Merge commit mesa-public/master into vulkan

author Jason Ekstrand <jason.ekstrand@intel.com>

Fri, 5 Feb 2016 23:03:04 +0000 (15:03 -0800)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Fri, 5 Feb 2016 23:03:44 +0000 (15:03 -0800)
author Jason Ekstrand <jason.ekstrand@intel.com>
Fri, 5 Feb 2016 23:03:04 +0000 (15:03 -0800)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Fri, 5 Feb 2016 23:03:44 +0000 (15:03 -0800)
diff --cc configure.ac
Simple merge
diff --cc src/Makefile.am
Simple merge
diff --cc src/compiler/Makefile.am

index 0000000000000000000000000000000000000000,0bc8e48efa64c35e8c71fb4c9e75250c89dafc67..e3d297fe299f1d2b606f776af2f7b9ad24444ea7

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/Makefile.am
+++ b/src/compiler/Makefile.am
@@@ -1,0 -1,323 +1,333 @@@
- -noinst_PROGRAMS = glsl_compiler
+ #
+ # Copyright © 2012 Jon TURNEY
+ # Copyright (C) 2015 Intel Corporation
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a
+ # copy of this software and associated documentation files (the "Software"),
+ # to deal in the Software without restriction, including without limitation
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ # and/or sell copies of the Software, and to permit persons to whom the
+ # Software is furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice (including the next
+ # paragraph) shall be included in all copies or substantial portions of the
+ # Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ # IN THE SOFTWARE.
+ 
+ include Makefile.sources
+ 
+ AM_CPPFLAGS = \
+       -I$(top_srcdir)/include \
+       -I$(top_srcdir)/src \
+       -I$(top_srcdir)/src/mapi \
+       -I$(top_srcdir)/src/mesa/ \
+       -I$(top_builddir)/src/compiler/glsl\
+       -I$(top_srcdir)/src/compiler/glsl\
+       -I$(top_srcdir)/src/compiler/glsl/glcpp\
+       -I$(top_srcdir)/src/gallium/include \
+       -I$(top_srcdir)/src/gallium/auxiliary \
+       -I$(top_srcdir)/src/gtest/include \
+       $(DEFINES)
+ 
+ AM_CFLAGS = \
+       $(VISIBILITY_CFLAGS) \
+       $(MSVC2013_COMPAT_CFLAGS)
+ 
+ AM_CXXFLAGS = \
+       $(VISIBILITY_CXXFLAGS) \
+       $(MSVC2013_COMPAT_CXXFLAGS)
+ 
+ noinst_LTLIBRARIES = libcompiler.la
+ 
+ libcompiler_la_SOURCES = $(LIBCOMPILER_FILES)
+ 
+ check_PROGRAMS =
+ TESTS =
+ BUILT_SOURCES =
+ CLEANFILES =
+ EXTRA_DIST = SConscript
+ 
+ 
+ EXTRA_DIST += glsl/tests glsl/glcpp/tests glsl/README \
+       glsl/TODO glsl/glcpp/README                     \
+       glsl/glsl_lexer.ll                              \
+       glsl/glsl_parser.yy                             \
+       glsl/glcpp/glcpp-lex.l                          \
+       glsl/glcpp/glcpp-parse.y                        \
+       glsl/Makefile.sources                           \
+       glsl/SConscript
+ 
+ TESTS += glsl/glcpp/tests/glcpp-test                  \
+       glsl/glcpp/tests/glcpp-test-cr-lf               \
+       glsl/tests/blob-test                            \
+       glsl/tests/general-ir-test                      \
+       glsl/tests/optimization-test                    \
+       glsl/tests/sampler-types-test                   \
+       glsl/tests/uniform-initializer-test
+ 
+ TESTS_ENVIRONMENT= \
+       export PYTHON2=$(PYTHON2); \
+       export PYTHON_FLAGS=$(PYTHON_FLAGS);
+ 
+ check_PROGRAMS +=                                     \
+       glsl/glcpp/glcpp                                \
+       glsl/glsl_test                                  \
+       glsl/tests/blob-test                            \
+       glsl/tests/general-ir-test                      \
+       glsl/tests/sampler-types-test                   \
+       glsl/tests/uniform-initializer-test
+ 
++noinst_PROGRAMS = glsl_compiler spirv2nir
+ 
+ glsl_tests_blob_test_SOURCES =                                \
+       glsl/tests/blob_test.c
+ glsl_tests_blob_test_LDADD =                          \
+       glsl/libglsl.la
+ 
+ glsl_tests_general_ir_test_SOURCES =                  \
+       glsl/standalone_scaffolding.cpp                 \
+       glsl/tests/builtin_variable_test.cpp            \
+       glsl/tests/invalidate_locations_test.cpp        \
+       glsl/tests/general_ir_test.cpp                  \
+       glsl/tests/varyings_test.cpp
+ glsl_tests_general_ir_test_CFLAGS =                   \
+       $(PTHREAD_CFLAGS)
+ glsl_tests_general_ir_test_LDADD =                    \
+       $(top_builddir)/src/gtest/libgtest.la           \
+       glsl/libglsl.la         \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(PTHREAD_LIBS)
+ 
+ glsl_tests_uniform_initializer_test_SOURCES =         \
+       glsl/tests/copy_constant_to_storage_tests.cpp   \
+       glsl/tests/set_uniform_initializer_tests.cpp    \
+       glsl/tests/uniform_initializer_utils.cpp        \
+       glsl/tests/uniform_initializer_utils.h
+ glsl_tests_uniform_initializer_test_CFLAGS =          \
+       $(PTHREAD_CFLAGS)
+ glsl_tests_uniform_initializer_test_LDADD =           \
+       $(top_builddir)/src/gtest/libgtest.la           \
+       glsl/libglsl.la         \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(PTHREAD_LIBS)
+ 
+ glsl_tests_sampler_types_test_SOURCES =                       \
+       glsl/tests/sampler_types_test.cpp
+ glsl_tests_sampler_types_test_CFLAGS =                        \
+       $(PTHREAD_CFLAGS)
+ glsl_tests_sampler_types_test_LDADD =                 \
+       $(top_builddir)/src/gtest/libgtest.la           \
+       glsl/libglsl.la                                 \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(PTHREAD_LIBS)
+ 
+ noinst_LTLIBRARIES += glsl/libglsl.la glsl/libglcpp.la
+ 
+ glsl_libglcpp_la_LIBADD =                             \
+       $(top_builddir)/src/util/libmesautil.la
+ glsl_libglcpp_la_SOURCES =                            \
+       glsl/glcpp/glcpp-lex.c                          \
+       glsl/glcpp/glcpp-parse.c                        \
+       glsl/glcpp/glcpp-parse.h                        \
+       $(LIBGLCPP_FILES)
+ 
+ glsl_glcpp_glcpp_SOURCES =                            \
+       glsl/glcpp/glcpp.c
+ glsl_glcpp_glcpp_LDADD =                              \
+       glsl/libglcpp.la        \
+       $(top_builddir)/src/libglsl_util.la             \
+       -lm
+ 
+ glsl_libglsl_la_LIBADD = \
+       nir/libnir.la \
+       glsl/libglcpp.la
+ 
+ glsl_libglsl_la_SOURCES =                             \
+       glsl/glsl_lexer.cpp                             \
+       glsl/glsl_parser.cpp                            \
+       glsl/glsl_parser.h                              \
+       $(LIBGLSL_FILES)
+ 
+ 
+ glsl_compiler_SOURCES = \
+       $(GLSL_COMPILER_CXX_FILES)
+ 
+ glsl_compiler_LDADD =                                 \
+       glsl/libglsl.la                                 \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(top_builddir)/src/util/libmesautil.la         \
+       $(PTHREAD_LIBS)
+ 
+ glsl_glsl_test_SOURCES = \
+       glsl/standalone_scaffolding.cpp \
+       glsl/test.cpp \
+       glsl/test_optpass.cpp \
+       glsl/test_optpass.h
+ 
+ glsl_glsl_test_LDADD =                                        \
+       glsl/libglsl.la                                 \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(PTHREAD_LIBS)
+ 
++spirv2nir_SOURCES = \
++      nir/spirv2nir.c
++
++spirv2nir_LDADD =                                     \
++      nir/libnir.la                                   \
++      $(top_builddir)/src/util/libmesautil.la         \
++      -lm -lstdc++                                    \
++      $(PTHREAD_LIBS)
++
+ # We write our own rules for yacc and lex below. We'd rather use automake,
+ # but automake makes it especially difficult for a number of reasons:
+ #
+ #  * < automake-1.12 generates .h files from .yy and .ypp files, but
+ #    >=automake-1.12 generates .hh and .hpp files respectively. There's no
+ #    good way of making a project that uses C++ yacc files compatible with
+ #    both versions of automake. Strong work automake developers.
+ #
+ #  * Since we're generating code from .l/.y files in a subdirectory (glcpp/)
+ #    we'd like the resulting generated code to also go in glcpp/ for purposes
+ #    of distribution. Automake gives no way to do this.
+ #
+ #  * Since we're building multiple yacc parsers into one library (and via one
+ #    Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes
+ #    automake to name the resulting generated code as <library-name>_filename.c.
+ #    Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file.
+ 
+ # In order to make build output print "LEX" and "YACC", we reproduce the
+ # automake variables below.
+ 
+ AM_V_LEX = $(am__v_LEX_$(V))
+ am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY))
+ am__v_LEX_0 = @echo "  LEX     " $@;
+ am__v_LEX_1 =
+ 
+ AM_V_YACC = $(am__v_YACC_$(V))
+ am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
+ am__v_YACC_0 = @echo "  YACC    " $@;
+ am__v_YACC_1 =
+ 
+ MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+ YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+ LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+ 
+ glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy
+       $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy
+ 
+ glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll
+       $(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll
+ 
+ glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y
+       $(MKDIR_GEN)
+       $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glsl/glcpp/glcpp-parse.h $(srcdir)/glsl/glcpp/glcpp-parse.y
+ 
+ glsl/glcpp/glcpp-lex.c: glsl/glcpp/glcpp-lex.l
+       $(MKDIR_GEN)
+       $(LEX_GEN) -o $@ $(srcdir)/glsl/glcpp/glcpp-lex.l
+ 
+ # Only the parsers (specifically the header files generated at the same time)
+ # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
+ # called for the .c/.cpp file and the .h files. By listing the .c/.cpp files
+ # YACC is only executed once for each parser. The rest of the generated code
+ # will be created at the appropriate times according to standard automake
+ # dependency rules.
+ BUILT_SOURCES +=                                      \
+       glsl/glsl_parser.cpp                            \
+       glsl/glsl_lexer.cpp                             \
+       glsl/glcpp/glcpp-parse.c                        \
+       glsl/glcpp/glcpp-lex.c
+ CLEANFILES +=                                         \
+       glsl/glcpp/glcpp-parse.h                        \
+       glsl/glsl_parser.h                              \
+       glsl/glsl_parser.cpp                            \
+       glsl/glsl_lexer.cpp                             \
+       glsl/glcpp/glcpp-parse.c                        \
+       glsl/glcpp/glcpp-lex.c
+ 
+ clean-local:
+       $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr
+ 
+ dist-hook:
+       $(RM) glsl/glcpp/tests/*.out
+       $(RM) glsl/glcpp/tests/subtest*/*.out
+ 
+ noinst_LTLIBRARIES += nir/libnir.la
+ 
+ nir_libnir_la_CPPFLAGS = \
+       $(AM_CPPFLAGS) \
+       -I$(top_builddir)/src/compiler/nir \
+       -I$(top_srcdir)/src/compiler/nir
+ 
+ nir_libnir_la_LIBADD = \
+       libcompiler.la
+ 
+ nir_libnir_la_SOURCES =                                       \
+       $(NIR_FILES)                                    \
++      $(SPIRV_FILES)                                  \
+       $(NIR_GENERATED_FILES)
+ 
+ PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+ 
+ nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@ || ($(RM) $@; false)
+ 
+ nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@ || ($(RM) $@; false)
+ 
+ nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@ || ($(RM) $@; false)
+ 
+ nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@ || ($(RM) $@; false)
+ 
+ nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@ || ($(RM) $@; false)
+ 
+ 
+ check_PROGRAMS += nir/tests/control_flow_tests
+ 
+ nir_tests_control_flow_tests_CPPFLAGS = \
+       $(AM_CPPFLAGS) \
+       -I$(top_builddir)/src/compiler/nir \
+       -I$(top_srcdir)/src/compiler/nir
+ 
+ nir_tests_control_flow_tests_SOURCES =                        \
+       nir/tests/control_flow_tests.cpp
+ nir_tests_control_flow_tests_CFLAGS =                 \
+       $(PTHREAD_CFLAGS)
+ nir_tests_control_flow_tests_LDADD =                  \
+       $(top_builddir)/src/gtest/libgtest.la           \
+       nir/libnir.la   \
+       $(top_builddir)/src/util/libmesautil.la         \
+       $(PTHREAD_LIBS)
+ 
+ 
+ TESTS += nir/tests/control_flow_tests
+ 
+ 
+ BUILT_SOURCES += $(NIR_GENERATED_FILES)
+ CLEANFILES += $(NIR_GENERATED_FILES)
+ 
+ EXTRA_DIST += \
+       nir/nir_algebraic.py                            \
+       nir/nir_builder_opcodes_h.py                    \
+       nir/nir_constant_expressions.py                 \
+       nir/nir_opcodes.py                              \
+       nir/nir_opcodes_c.py                            \
+       nir/nir_opcodes_h.py                            \
+       nir/nir_opt_algebraic.py                        \
+       nir/tests                                       \
+       nir/Makefile.sources
diff --cc src/compiler/Makefile.sources

index 0000000000000000000000000000000000000000,c9780d6d6f75f26ef5d3d8fd62ff043158e104d5..2a4568aa679879350e658d07c76ef651a4167483

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@@ -1,0 -1,226 +1,242 @@@
+ LIBCOMPILER_FILES = \
+       builtin_type_macros.h \
+       glsl_types.cpp \
+       glsl_types.h \
+       nir_types.cpp \
+       nir_types.h \
+       shader_enums.c \
+       shader_enums.h
+ 
+ # libglsl
+ 
+ LIBGLSL_FILES = \
+       glsl/ast.h \
+       glsl/ast_array_index.cpp \
+       glsl/ast_expr.cpp \
+       glsl/ast_function.cpp \
+       glsl/ast_to_hir.cpp \
+       glsl/ast_type.cpp \
+       glsl/blob.c \
+       glsl/blob.h \
+       glsl/builtin_functions.cpp \
+       glsl/builtin_types.cpp \
+       glsl/builtin_variables.cpp \
+       glsl/glsl_parser_extras.cpp \
+       glsl/glsl_parser_extras.h \
+       glsl/glsl_symbol_table.cpp \
+       glsl/glsl_symbol_table.h \
+       glsl/hir_field_selection.cpp \
+       glsl/ir_basic_block.cpp \
+       glsl/ir_basic_block.h \
+       glsl/ir_builder.cpp \
+       glsl/ir_builder.h \
+       glsl/ir_clone.cpp \
+       glsl/ir_constant_expression.cpp \
+       glsl/ir.cpp \
+       glsl/ir.h \
+       glsl/ir_equals.cpp \
+       glsl/ir_expression_flattening.cpp \
+       glsl/ir_expression_flattening.h \
+       glsl/ir_function_can_inline.cpp \
+       glsl/ir_function_detect_recursion.cpp \
+       glsl/ir_function_inlining.h \
+       glsl/ir_function.cpp \
+       glsl/ir_hierarchical_visitor.cpp \
+       glsl/ir_hierarchical_visitor.h \
+       glsl/ir_hv_accept.cpp \
+       glsl/ir_import_prototypes.cpp \
+       glsl/ir_optimization.h \
+       glsl/ir_print_visitor.cpp \
+       glsl/ir_print_visitor.h \
+       glsl/ir_reader.cpp \
+       glsl/ir_reader.h \
+       glsl/ir_rvalue_visitor.cpp \
+       glsl/ir_rvalue_visitor.h \
+       glsl/ir_set_program_inouts.cpp \
+       glsl/ir_uniform.h \
+       glsl/ir_validate.cpp \
+       glsl/ir_variable_refcount.cpp \
+       glsl/ir_variable_refcount.h \
+       glsl/ir_visitor.h \
+       glsl/linker.cpp \
+       glsl/linker.h \
+       glsl/link_atomics.cpp \
+       glsl/link_functions.cpp \
+       glsl/link_interface_blocks.cpp \
+       glsl/link_uniforms.cpp \
+       glsl/link_uniform_initializers.cpp \
+       glsl/link_uniform_block_active_visitor.cpp \
+       glsl/link_uniform_block_active_visitor.h \
+       glsl/link_uniform_blocks.cpp \
+       glsl/link_varyings.cpp \
+       glsl/link_varyings.h \
+       glsl/list.h \
+       glsl/loop_analysis.cpp \
+       glsl/loop_analysis.h \
+       glsl/loop_controls.cpp \
+       glsl/loop_unroll.cpp \
+       glsl/lower_buffer_access.cpp \
+       glsl/lower_buffer_access.h \
+       glsl/lower_clip_distance.cpp \
+       glsl/lower_const_arrays_to_uniforms.cpp \
+       glsl/lower_discard.cpp \
+       glsl/lower_discard_flow.cpp \
+       glsl/lower_if_to_cond_assign.cpp \
+       glsl/lower_instructions.cpp \
+       glsl/lower_jumps.cpp \
+       glsl/lower_mat_op_to_vec.cpp \
+       glsl/lower_noise.cpp \
+       glsl/lower_offset_array.cpp \
+       glsl/lower_packed_varyings.cpp \
+       glsl/lower_named_interface_blocks.cpp \
+       glsl/lower_packing_builtins.cpp \
+       glsl/lower_subroutine.cpp \
+       glsl/lower_tess_level.cpp \
+       glsl/lower_texture_projection.cpp \
+       glsl/lower_variable_index_to_cond_assign.cpp \
+       glsl/lower_vec_index_to_cond_assign.cpp \
+       glsl/lower_vec_index_to_swizzle.cpp \
+       glsl/lower_vector.cpp \
+       glsl/lower_vector_derefs.cpp \
+       glsl/lower_vector_insert.cpp \
+       glsl/lower_vertex_id.cpp \
+       glsl/lower_output_reads.cpp \
+       glsl/lower_shared_reference.cpp \
+       glsl/lower_ubo_reference.cpp \
+       glsl/opt_algebraic.cpp \
+       glsl/opt_array_splitting.cpp \
+       glsl/opt_conditional_discard.cpp \
+       glsl/opt_constant_folding.cpp \
+       glsl/opt_constant_propagation.cpp \
+       glsl/opt_constant_variable.cpp \
+       glsl/opt_copy_propagation.cpp \
+       glsl/opt_copy_propagation_elements.cpp \
+       glsl/opt_dead_builtin_variables.cpp \
+       glsl/opt_dead_builtin_varyings.cpp \
+       glsl/opt_dead_code.cpp \
+       glsl/opt_dead_code_local.cpp \
+       glsl/opt_dead_functions.cpp \
+       glsl/opt_flatten_nested_if_blocks.cpp \
+       glsl/opt_flip_matrices.cpp \
+       glsl/opt_function_inlining.cpp \
+       glsl/opt_if_simplification.cpp \
+       glsl/opt_minmax.cpp \
+       glsl/opt_noop_swizzle.cpp \
+       glsl/opt_rebalance_tree.cpp \
+       glsl/opt_redundant_jumps.cpp \
+       glsl/opt_structure_splitting.cpp \
+       glsl/opt_swizzle_swizzle.cpp \
+       glsl/opt_tree_grafting.cpp \
+       glsl/opt_vectorize.cpp \
+       glsl/program.h \
+       glsl/s_expression.cpp \
+       glsl/s_expression.h
+ 
+ # glsl_compiler
+ 
+ GLSL_COMPILER_CXX_FILES = \
+       glsl/standalone_scaffolding.cpp \
+       glsl/standalone_scaffolding.h \
+       glsl/main.cpp
+ 
+ # libglsl generated sources
+ LIBGLSL_GENERATED_CXX_FILES = \
+       glsl/glsl_lexer.cpp \
+       glsl/glsl_parser.cpp
+ 
+ # libglcpp
+ 
+ LIBGLCPP_FILES = \
+       glsl/glcpp/glcpp.h \
+       glsl/glcpp/pp.c
+ 
+ LIBGLCPP_GENERATED_FILES = \
+       glsl/glcpp/glcpp-lex.c \
+       glsl/glcpp/glcpp-parse.c
+ 
+ NIR_GENERATED_FILES = \
+       nir/nir_builder_opcodes.h \
+       nir/nir_constant_expressions.c \
+       nir/nir_opcodes.c \
+       nir/nir_opcodes.h \
+       nir/nir_opt_algebraic.c
+ 
+ NIR_FILES = \
+       nir/glsl_to_nir.cpp \
+       nir/glsl_to_nir.h \
+       nir/nir.c \
+       nir/nir.h \
+       nir/nir_array.h \
+       nir/nir_builder.h \
+       nir/nir_clone.c \
+       nir/nir_constant_expressions.h \
+       nir/nir_control_flow.c \
+       nir/nir_control_flow.h \
+       nir/nir_control_flow_private.h \
+       nir/nir_dominance.c \
+       nir/nir_from_ssa.c \
++      nir/nir_gather_info.c \
+       nir/nir_gs_count_vertices.c \
++      nir/nir_inline_functions.c \
+       nir/nir_intrinsics.c \
+       nir/nir_intrinsics.h \
+       nir/nir_instr_set.c \
+       nir/nir_instr_set.h \
+       nir/nir_liveness.c \
+       nir/nir_lower_alu_to_scalar.c \
+       nir/nir_lower_atomics.c \
+       nir/nir_lower_clip.c \
+       nir/nir_lower_global_vars_to_local.c \
+       nir/nir_lower_gs_intrinsics.c \
++        nir/nir_lower_indirect_derefs.c \
+       nir/nir_lower_load_const_to_scalar.c \
+       nir/nir_lower_locals_to_regs.c \
+       nir/nir_lower_idiv.c \
+       nir/nir_lower_io.c \
+       nir/nir_lower_outputs_to_temporaries.c \
+       nir/nir_lower_phis_to_scalar.c \
++      nir/nir_lower_returns.c \
+       nir/nir_lower_samplers.c \
+       nir/nir_lower_system_values.c \
+       nir/nir_lower_tex.c \
+       nir/nir_lower_to_source_mods.c \
+       nir/nir_lower_two_sided_color.c \
+       nir/nir_lower_vars_to_ssa.c \
+       nir/nir_lower_var_copies.c \
+       nir/nir_lower_vec_to_movs.c \
+       nir/nir_metadata.c \
+       nir/nir_move_vec_src_uses_to_dest.c \
+       nir/nir_normalize_cubemap_coords.c \
+       nir/nir_opt_constant_folding.c \
+       nir/nir_opt_copy_propagate.c \
+       nir/nir_opt_cse.c \
+       nir/nir_opt_dce.c \
+       nir/nir_opt_dead_cf.c \
+       nir/nir_opt_gcm.c \
+       nir/nir_opt_global_to_local.c \
+       nir/nir_opt_peephole_select.c \
+       nir/nir_opt_remove_phis.c \
+       nir/nir_opt_undef.c \
++      nir/nir_phi_builder.c \
++      nir/nir_phi_builder.h \
+       nir/nir_print.c \
+       nir/nir_remove_dead_variables.c \
++      nir/nir_repair_ssa.c \
+       nir/nir_search.c \
+       nir/nir_search.h \
+       nir/nir_split_var_copies.c \
+       nir/nir_sweep.c \
+       nir/nir_to_ssa.c \
+       nir/nir_validate.c \
+       nir/nir_vla.h \
+       nir/nir_worklist.c \
+       nir/nir_worklist.h
++
++SPIRV_FILES = \
++      nir/spirv/nir_spirv.h \
++      nir/spirv/spirv_to_nir.c \
++      nir/spirv/vtn_alu.c \
++      nir/spirv/vtn_cfg.c \
++      nir/spirv/vtn_glsl450.c \
++      nir/spirv/vtn_private.h \
++      nir/spirv/vtn_variables.c
diff --cc src/compiler/glsl/.gitignore

index 0000000000000000000000000000000000000000,dda423f83db6df30eded8a2aacf772e46de3d568..e80f8af6bfcf70450f7d024e7ea36a0c6ea12226

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/.gitignore
+++ b/src/compiler/glsl/.gitignore
@@@ -1,0 -1,10 +1,11 @@@
+ glsl_compiler
+ glsl_lexer.cpp
+ glsl_parser.cpp
+ glsl_parser.h
+ glsl_parser.output
+ glsl_test
++spirv2nir
+ subtest-cr/
+ subtest-lf/
+ subtest-cr-lf/
+ subtest-lf-cr/
diff --cc src/compiler/glsl/Makefile.am

index 0000000000000000000000000000000000000000,9954b81240338659ec881300cf3eec29dcc067c4..d6b1f9ed6958da6d852e88aba95229352c7b0390

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/Makefile.am
+++ b/src/compiler/glsl/Makefile.am
@@@ -1,0 -1,228 +1,237 @@@
- -noinst_PROGRAMS = glsl_compiler
+ # Copyright © 2012 Jon TURNEY
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a
+ # copy of this software and associated documentation files (the "Software"),
+ # to deal in the Software without restriction, including without limitation
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ # and/or sell copies of the Software, and to permit persons to whom the
+ # Software is furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice (including the next
+ # paragraph) shall be included in all copies or substantial portions of the
+ # Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ # IN THE SOFTWARE.
+ 
+ AM_CPPFLAGS = \
+       -I$(top_srcdir)/include \
+       -I$(top_srcdir)/src \
+       -I$(top_srcdir)/src/mapi \
+       -I$(top_srcdir)/src/mesa/ \
+       -I$(top_srcdir)/src/gallium/include \
+       -I$(top_srcdir)/src/gallium/auxiliary \
+       -I$(top_srcdir)/src/glsl/glcpp \
+       -I$(top_srcdir)/src/gtest/include \
+       $(DEFINES)
+ AM_CFLAGS = \
+       $(VISIBILITY_CFLAGS) \
+       $(MSVC2013_COMPAT_CFLAGS)
+ AM_CXXFLAGS = \
+       $(VISIBILITY_CXXFLAGS) \
+       $(MSVC2013_COMPAT_CXXFLAGS)
+ 
+ EXTRA_DIST = tests glcpp/tests README TODO glcpp/README       \
+       glsl_lexer.ll                                   \
+       glsl_parser.yy                                  \
+       glcpp/glcpp-lex.l                               \
+       glcpp/glcpp-parse.y                             \
+       SConscript
+ 
+ include Makefile.sources
+ 
+ TESTS = glcpp/tests/glcpp-test                                \
+       glcpp/tests/glcpp-test-cr-lf                    \
+       tests/blob-test                                 \
+       tests/general-ir-test                           \
+       tests/optimization-test                         \
+       tests/sampler-types-test                        \
+       tests/uniform-initializer-test
+ 
+ TESTS_ENVIRONMENT= \
+       export PYTHON2=$(PYTHON2); \
+       export PYTHON_FLAGS=$(PYTHON_FLAGS);
+ 
+ noinst_LTLIBRARIES = libglsl.la libglcpp.la
+ check_PROGRAMS =                                      \
+       glcpp/glcpp                                     \
+       glsl_test                                       \
+       tests/blob-test                                 \
+       tests/general-ir-test                           \
+       tests/sampler-types-test                        \
+       tests/uniform-initializer-test
+ 
- -
++noinst_PROGRAMS = glsl_compiler spirv2nir
+ 
+ tests_blob_test_SOURCES =                             \
+       tests/blob_test.c
+ tests_blob_test_LDADD =                                       \
+       $(top_builddir)/src/glsl/libglsl.la
+ 
+ tests_general_ir_test_SOURCES =               \
+       standalone_scaffolding.cpp                      \
+       tests/builtin_variable_test.cpp                 \
+       tests/invalidate_locations_test.cpp             \
+       tests/general_ir_test.cpp                       \
+       tests/varyings_test.cpp
+ tests_general_ir_test_CFLAGS =                                \
+       $(PTHREAD_CFLAGS)
+ tests_general_ir_test_LDADD =                         \
+       $(top_builddir)/src/gtest/libgtest.la           \
+       $(top_builddir)/src/glsl/libglsl.la             \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(PTHREAD_LIBS)
+ 
+ tests_uniform_initializer_test_SOURCES =              \
+       tests/copy_constant_to_storage_tests.cpp        \
+       tests/set_uniform_initializer_tests.cpp         \
+       tests/uniform_initializer_utils.cpp             \
+       tests/uniform_initializer_utils.h
+ tests_uniform_initializer_test_CFLAGS =                       \
+       $(PTHREAD_CFLAGS)
+ tests_uniform_initializer_test_LDADD =                        \
+       $(top_builddir)/src/gtest/libgtest.la           \
+       $(top_builddir)/src/glsl/libglsl.la             \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(PTHREAD_LIBS)
+ 
+ tests_sampler_types_test_SOURCES =                    \
+       tests/sampler_types_test.cpp
+ tests_sampler_types_test_CFLAGS =                     \
+       $(PTHREAD_CFLAGS)
+ tests_sampler_types_test_LDADD =                      \
+       $(top_builddir)/src/gtest/libgtest.la           \
+       $(top_builddir)/src/glsl/libglsl.la             \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(PTHREAD_LIBS)
+ 
+ libglcpp_la_LIBADD =                                  \
+       $(top_builddir)/src/util/libmesautil.la
+ libglcpp_la_SOURCES =                                 \
+       glcpp/glcpp-lex.c                               \
+       glcpp/glcpp-parse.c                             \
+       glcpp/glcpp-parse.h                             \
+       $(LIBGLCPP_FILES)
+ 
+ glcpp_glcpp_SOURCES =                                 \
+       glcpp/glcpp.c
+ glcpp_glcpp_LDADD =                                   \
+       libglcpp.la                                     \
+       $(top_builddir)/src/libglsl_util.la             \
+       -lm
+ 
+ libglsl_la_LIBADD = \
+       $(top_builddir)/src/compiler/nir/libnir.la \
+       libglcpp.la
+ 
+ libglsl_la_SOURCES =                                  \
+       glsl_lexer.cpp                                  \
+       glsl_parser.cpp                                 \
+       glsl_parser.h                                   \
+       $(LIBGLSL_FILES)
+ 
+ glsl_compiler_SOURCES = \
+       $(GLSL_COMPILER_CXX_FILES)
+ 
+ glsl_compiler_LDADD =                                 \
+       libglsl.la                                      \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(top_builddir)/src/util/libmesautil.la         \
+       $(PTHREAD_LIBS)
+ 
++spirv2nir_SOURCES = \
++      standalone_scaffolding.cpp \
++      standalone_scaffolding.h \
++      nir/spirv2nir.c
++
++spirv2nir_LDADD =                                     \
++      libglsl.la                                      \
++      $(top_builddir)/src/libglsl_util.la             \
++      $(PTHREAD_LIBS)
++
+ glsl_test_SOURCES = \
+       standalone_scaffolding.cpp \
+       test.cpp \
+       test_optpass.cpp \
+       test_optpass.h
+ 
+ glsl_test_LDADD =                                     \
+       libglsl.la                                      \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(PTHREAD_LIBS)
+ 
+ # We write our own rules for yacc and lex below. We'd rather use automake,
+ # but automake makes it especially difficult for a number of reasons:
+ #
+ #  * < automake-1.12 generates .h files from .yy and .ypp files, but
+ #    >=automake-1.12 generates .hh and .hpp files respectively. There's no
+ #    good way of making a project that uses C++ yacc files compatible with
+ #    both versions of automake. Strong work automake developers.
+ #
+ #  * Since we're generating code from .l/.y files in a subdirectory (glcpp/)
+ #    we'd like the resulting generated code to also go in glcpp/ for purposes
+ #    of distribution. Automake gives no way to do this.
+ #
+ #  * Since we're building multiple yacc parsers into one library (and via one
+ #    Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes
+ #    automake to name the resulting generated code as <library-name>_filename.c.
+ #    Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file.
+ 
+ # In order to make build output print "LEX" and "YACC", we reproduce the
+ # automake variables below.
+ 
+ AM_V_LEX = $(am__v_LEX_$(V))
+ am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY))
+ am__v_LEX_0 = @echo "  LEX     " $@;
+ am__v_LEX_1 =
+ 
+ AM_V_YACC = $(am__v_YACC_$(V))
+ am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
+ am__v_YACC_0 = @echo "  YACC    " $@;
+ am__v_YACC_1 =
+ 
+ MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+ YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+ LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+ 
+ glsl_parser.cpp glsl_parser.h: glsl_parser.yy
+       $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy
+ 
+ glsl_lexer.cpp: glsl_lexer.ll
+       $(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll
+ 
+ glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
+       $(MKDIR_GEN)
+       $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y
+ 
+ glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
+       $(MKDIR_GEN)
+       $(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l
+ 
+ # Only the parsers (specifically the header files generated at the same time)
+ # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
+ # called for the .c/.cpp file and the .h files. By listing the .c/.cpp files
+ # YACC is only executed once for each parser. The rest of the generated code
+ # will be created at the appropriate times according to standard automake
+ # dependency rules.
+ BUILT_SOURCES =                                               \
+       glsl_parser.cpp                                 \
+       glsl_lexer.cpp                                  \
+       glcpp/glcpp-parse.c                             \
+       glcpp/glcpp-lex.c
+ CLEANFILES =                                          \
+       glcpp/glcpp-parse.h                             \
+       glsl_parser.h                                   \
+       $(BUILT_SOURCES)
+ 
+ clean-local:
+       $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr
+ 
+ dist-hook:
+       $(RM) glcpp/tests/*.out
+       $(RM) glcpp/tests/subtest*/*.out
diff --cc src/compiler/glsl/Makefile.sources

index 0000000000000000000000000000000000000000,08b40c5cc8fb64006bd7dbfd9b2ca09f0065f18f..3f537d5b37acaf83549cca35132290b5dac82bf4

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/Makefile.sources
+++ b/src/compiler/glsl/Makefile.sources
@@@ -1,0 -1,222 +1,238 @@@
+ # shared source lists for Makefile, SConscript, and Android.mk
+ 
+ # libglcpp
+ 
+ LIBGLCPP_FILES = \
+       glcpp/glcpp.h \
+       glcpp/pp.c
+ 
+ LIBGLCPP_GENERATED_FILES = \
+       glcpp/glcpp-lex.c \
+       glcpp/glcpp-parse.c
+ 
+ NIR_GENERATED_FILES = \
+       nir/nir_builder_opcodes.h \
+       nir/nir_constant_expressions.c \
+       nir/nir_opcodes.c \
+       nir/nir_opcodes.h \
+       nir/nir_opt_algebraic.c
+ 
+ NIR_FILES = \
+       nir/nir.c \
+       nir/nir.h \
+       nir/nir_array.h \
+       nir/nir_builder.h \
+       nir/nir_clone.c \
+       nir/nir_constant_expressions.h \
+       nir/nir_control_flow.c \
+       nir/nir_control_flow.h \
+       nir/nir_control_flow_private.h \
+       nir/nir_dominance.c \
+       nir/nir_from_ssa.c \
++      nir/nir_gather_info.c \
+       nir/nir_gs_count_vertices.c \
++      nir/nir_inline_functions.c \
+       nir/nir_intrinsics.c \
+       nir/nir_intrinsics.h \
+       nir/nir_instr_set.c \
+       nir/nir_instr_set.h \
+       nir/nir_liveness.c \
+       nir/nir_lower_alu_to_scalar.c \
+       nir/nir_lower_atomics.c \
+       nir/nir_lower_clip.c \
++      nir/nir_lower_returns.c \
+       nir/nir_lower_global_vars_to_local.c \
+       nir/nir_lower_gs_intrinsics.c \
++        nir/nir_lower_indirect_derefs.c \
+       nir/nir_lower_load_const_to_scalar.c \
+       nir/nir_lower_locals_to_regs.c \
+       nir/nir_lower_idiv.c \
+       nir/nir_lower_io.c \
+       nir/nir_lower_outputs_to_temporaries.c \
+       nir/nir_lower_phis_to_scalar.c \
+       nir/nir_lower_samplers.c \
+       nir/nir_lower_system_values.c \
+       nir/nir_lower_tex.c \
+       nir/nir_lower_to_source_mods.c \
+       nir/nir_lower_two_sided_color.c \
+       nir/nir_lower_vars_to_ssa.c \
+       nir/nir_lower_var_copies.c \
+       nir/nir_lower_vec_to_movs.c \
+       nir/nir_metadata.c \
+       nir/nir_move_vec_src_uses_to_dest.c \
+       nir/nir_normalize_cubemap_coords.c \
+       nir/nir_opt_constant_folding.c \
+       nir/nir_opt_copy_propagate.c \
+       nir/nir_opt_cse.c \
+       nir/nir_opt_dce.c \
+       nir/nir_opt_dead_cf.c \
+       nir/nir_opt_gcm.c \
+       nir/nir_opt_global_to_local.c \
+       nir/nir_opt_peephole_select.c \
+       nir/nir_opt_remove_phis.c \
+       nir/nir_opt_undef.c \
++      nir/nir_phi_builder.c \
++      nir/nir_phi_builder.h \
+       nir/nir_print.c \
+       nir/nir_remove_dead_variables.c \
++      nir/nir_repair_ssa.c \
+       nir/nir_search.c \
+       nir/nir_search.h \
+       nir/nir_split_var_copies.c \
+       nir/nir_sweep.c \
+       nir/nir_to_ssa.c \
+       nir/nir_validate.c \
+       nir/nir_vla.h \
+       nir/nir_worklist.c \
+       nir/nir_worklist.h
+ 
++SPIRV_FILES = \
++      nir/spirv/nir_spirv.h \
++      nir/spirv/spirv_to_nir.c \
++      nir/spirv/vtn_alu.c \
++      nir/spirv/vtn_cfg.c \
++      nir/spirv/vtn_glsl450.c \
++      nir/spirv/vtn_private.h \
++      nir/spirv/vtn_variables.c
++
+ # libglsl
+ 
+ LIBGLSL_FILES = \
+       ast.h \
+       ast_array_index.cpp \
+       ast_expr.cpp \
+       ast_function.cpp \
+       ast_to_hir.cpp \
+       ast_type.cpp \
+       blob.c \
+       blob.h \
+       builtin_functions.cpp \
+       builtin_types.cpp \
+       builtin_variables.cpp \
+       glsl_parser_extras.cpp \
+       glsl_parser_extras.h \
+       glsl_symbol_table.cpp \
+       glsl_symbol_table.h \
+       hir_field_selection.cpp \
+       ir_basic_block.cpp \
+       ir_basic_block.h \
+       ir_builder.cpp \
+       ir_builder.h \
+       ir_clone.cpp \
+       ir_constant_expression.cpp \
+       ir.cpp \
+       ir.h \
+       ir_equals.cpp \
+       ir_expression_flattening.cpp \
+       ir_expression_flattening.h \
+       ir_function_can_inline.cpp \
+       ir_function_detect_recursion.cpp \
+       ir_function_inlining.h \
+       ir_function.cpp \
+       ir_hierarchical_visitor.cpp \
+       ir_hierarchical_visitor.h \
+       ir_hv_accept.cpp \
+       ir_import_prototypes.cpp \
+       ir_optimization.h \
+       ir_print_visitor.cpp \
+       ir_print_visitor.h \
+       ir_reader.cpp \
+       ir_reader.h \
+       ir_rvalue_visitor.cpp \
+       ir_rvalue_visitor.h \
+       ir_set_program_inouts.cpp \
+       ir_uniform.h \
+       ir_validate.cpp \
+       ir_variable_refcount.cpp \
+       ir_variable_refcount.h \
+       ir_visitor.h \
+       linker.cpp \
+       linker.h \
+       link_atomics.cpp \
+       link_functions.cpp \
+       link_interface_blocks.cpp \
+       link_uniforms.cpp \
+       link_uniform_initializers.cpp \
+       link_uniform_block_active_visitor.cpp \
+       link_uniform_block_active_visitor.h \
+       link_uniform_blocks.cpp \
+       link_varyings.cpp \
+       link_varyings.h \
+       list.h \
+       loop_analysis.cpp \
+       loop_analysis.h \
+       loop_controls.cpp \
+       loop_unroll.cpp \
+       lower_buffer_access.cpp \
+       lower_buffer_access.h \
+       lower_clip_distance.cpp \
+       lower_const_arrays_to_uniforms.cpp \
+       lower_discard.cpp \
+       lower_discard_flow.cpp \
+       lower_if_to_cond_assign.cpp \
+       lower_instructions.cpp \
+       lower_jumps.cpp \
+       lower_mat_op_to_vec.cpp \
+       lower_noise.cpp \
+       lower_offset_array.cpp \
+       lower_packed_varyings.cpp \
+       lower_named_interface_blocks.cpp \
+       lower_packing_builtins.cpp \
+       lower_subroutine.cpp \
+       lower_tess_level.cpp \
+       lower_texture_projection.cpp \
+       lower_variable_index_to_cond_assign.cpp \
+       lower_vec_index_to_cond_assign.cpp \
+       lower_vec_index_to_swizzle.cpp \
+       lower_vector.cpp \
+       lower_vector_derefs.cpp \
+       lower_vector_insert.cpp \
+       lower_vertex_id.cpp \
+       lower_output_reads.cpp \
+       lower_shared_reference.cpp \
+       lower_ubo_reference.cpp \
+       opt_algebraic.cpp \
+       opt_array_splitting.cpp \
+       opt_conditional_discard.cpp \
+       opt_constant_folding.cpp \
+       opt_constant_propagation.cpp \
+       opt_constant_variable.cpp \
+       opt_copy_propagation.cpp \
+       opt_copy_propagation_elements.cpp \
+       opt_dead_builtin_variables.cpp \
+       opt_dead_builtin_varyings.cpp \
+       opt_dead_code.cpp \
+       opt_dead_code_local.cpp \
+       opt_dead_functions.cpp \
+       opt_flatten_nested_if_blocks.cpp \
+       opt_flip_matrices.cpp \
+       opt_function_inlining.cpp \
+       opt_if_simplification.cpp \
+       opt_minmax.cpp \
+       opt_noop_swizzle.cpp \
+       opt_rebalance_tree.cpp \
+       opt_redundant_jumps.cpp \
+       opt_structure_splitting.cpp \
+       opt_swizzle_swizzle.cpp \
+       opt_tree_grafting.cpp \
+       opt_vectorize.cpp \
+       program.h \
+       s_expression.cpp \
+       s_expression.h
+ 
+ # glsl to nir pass
+ GLSL_TO_NIR_FILES = \
+       nir/glsl_to_nir.cpp \
+       nir/glsl_to_nir.h
+ 
+ # glsl_compiler
+ 
+ GLSL_COMPILER_CXX_FILES = \
+       standalone_scaffolding.cpp \
+       standalone_scaffolding.h \
+       main.cpp
+ 
+ # libglsl generated sources
+ LIBGLSL_GENERATED_CXX_FILES = \
+       glsl_lexer.cpp \
+       glsl_parser.cpp
diff --cc src/compiler/glsl/ast_to_hir.cpp

index 0000000000000000000000000000000000000000,dfd31966eb0fa19e72b7d9621aa9246a43d366b6..98d8bc5f2681dbcf2888fb478f3b2604641cb52e

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@@ -1,0 -1,7583 +1,7584 @@@
+ /*
+  * Copyright © 2010 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ /**
+  * \file ast_to_hir.c
+  * Convert abstract syntax to to high-level intermediate reprensentation (HIR).
+  *
+  * During the conversion to HIR, the majority of the symantic checking is
+  * preformed on the program.  This includes:
+  *
+  *    * Symbol table management
+  *    * Type checking
+  *    * Function binding
+  *
+  * The majority of this work could be done during parsing, and the parser could
+  * probably generate HIR directly.  However, this results in frequent changes
+  * to the parser code.  Since we do not assume that every system this complier
+  * is built on will have Flex and Bison installed, we have to store the code
+  * generated by these tools in our version control system.  In other parts of
+  * the system we've seen problems where a parser was changed but the generated
+  * code was not committed, merge conflicts where created because two developers
+  * had slightly different versions of Bison installed, etc.
+  *
+  * I have also noticed that running Bison generated parsers in GDB is very
+  * irritating.  When you get a segfault on '$$ = $1->foo', you can't very
+  * well 'print $1' in GDB.
+  *
+  * As a result, my preference is to put as little C code as possible in the
+  * parser (and lexer) sources.
+  */
+ 
+ #include "glsl_symbol_table.h"
+ #include "glsl_parser_extras.h"
+ #include "ast.h"
+ #include "compiler/glsl_types.h"
+ #include "program/hash_table.h"
+ #include "main/shaderobj.h"
+ #include "ir.h"
+ #include "ir_builder.h"
+ 
+ using namespace ir_builder;
+ 
+ static void
+ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
+                              exec_list *instructions);
+ static void
+ remove_per_vertex_blocks(exec_list *instructions,
+                          _mesa_glsl_parse_state *state, ir_variable_mode mode);
+ 
+ /**
+  * Visitor class that finds the first instance of any write-only variable that
+  * is ever read, if any
+  */
+ class read_from_write_only_variable_visitor : public ir_hierarchical_visitor
+ {
+ public:
+    read_from_write_only_variable_visitor() : found(NULL)
+    {
+    }
+ 
+    virtual ir_visitor_status visit(ir_dereference_variable *ir)
+    {
+       if (this->in_assignee)
+          return visit_continue;
+ 
+       ir_variable *var = ir->variable_referenced();
+       /* We can have image_write_only set on both images and buffer variables,
+        * but in the former there is a distinction between reads from
+        * the variable itself (write_only) and from the memory they point to
+        * (image_write_only), while in the case of buffer variables there is
+        * no such distinction, that is why this check here is limited to
+        * buffer variables alone.
+        */
+       if (!var || var->data.mode != ir_var_shader_storage)
+          return visit_continue;
+ 
+       if (var->data.image_write_only) {
+          found = var;
+          return visit_stop;
+       }
+ 
+       return visit_continue;
+    }
+ 
+    ir_variable *get_variable() {
+       return found;
+    }
+ 
+    virtual ir_visitor_status visit_enter(ir_expression *ir)
+    {
+       /* .length() doesn't actually read anything */
+       if (ir->operation == ir_unop_ssbo_unsized_array_length)
+          return visit_continue_with_parent;
+ 
+       return visit_continue;
+    }
+ 
+ private:
+    ir_variable *found;
+ };
+ 
+ void
+ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
+ {
+    _mesa_glsl_initialize_variables(instructions, state);
+ 
+    state->symbols->separate_function_namespace = state->language_version == 110;
+ 
+    state->current_function = NULL;
+ 
+    state->toplevel_ir = instructions;
+ 
+    state->gs_input_prim_type_specified = false;
+    state->tcs_output_vertices_specified = false;
+    state->cs_input_local_size_specified = false;
+ 
+    /* Section 4.2 of the GLSL 1.20 specification states:
+     * "The built-in functions are scoped in a scope outside the global scope
+     *  users declare global variables in.  That is, a shader's global scope,
+     *  available for user-defined functions and global variables, is nested
+     *  inside the scope containing the built-in functions."
+     *
+     * Since built-in functions like ftransform() access built-in variables,
+     * it follows that those must be in the outer scope as well.
+     *
+     * We push scope here to create this nesting effect...but don't pop.
+     * This way, a shader's globals are still in the symbol table for use
+     * by the linker.
+     */
+    state->symbols->push_scope();
+ 
+    foreach_list_typed (ast_node, ast, link, & state->translation_unit)
+       ast->hir(instructions, state);
+ 
+    detect_recursion_unlinked(state, instructions);
+    detect_conflicting_assignments(state, instructions);
+ 
+    state->toplevel_ir = NULL;
+ 
+    /* Move all of the variable declarations to the front of the IR list, and
+     * reverse the order.  This has the (intended!) side effect that vertex
+     * shader inputs and fragment shader outputs will appear in the IR in the
+     * same order that they appeared in the shader code.  This results in the
+     * locations being assigned in the declared order.  Many (arguably buggy)
+     * applications depend on this behavior, and it matches what nearly all
+     * other drivers do.
+     */
+    foreach_in_list_safe(ir_instruction, node, instructions) {
+       ir_variable *const var = node->as_variable();
+ 
+       if (var == NULL)
+          continue;
+ 
+       var->remove();
+       instructions->push_head(var);
+    }
+ 
+    /* Figure out if gl_FragCoord is actually used in fragment shader */
+    ir_variable *const var = state->symbols->get_variable("gl_FragCoord");
+    if (var != NULL)
+       state->fs_uses_gl_fragcoord = var->data.used;
+ 
+    /* From section 7.1 (Built-In Language Variables) of the GLSL 4.10 spec:
+     *
+     *     If multiple shaders using members of a built-in block belonging to
+     *     the same interface are linked together in the same program, they
+     *     must all redeclare the built-in block in the same way, as described
+     *     in section 4.3.7 "Interface Blocks" for interface block matching, or
+     *     a link error will result.
+     *
+     * The phrase "using members of a built-in block" implies that if two
+     * shaders are linked together and one of them *does not use* any members
+     * of the built-in block, then that shader does not need to have a matching
+     * redeclaration of the built-in block.
+     *
+     * This appears to be a clarification to the behaviour established for
+     * gl_PerVertex by GLSL 1.50, therefore implement it regardless of GLSL
+     * version.
+     *
+     * The definition of "interface" in section 4.3.7 that applies here is as
+     * follows:
+     *
+     *     The boundary between adjacent programmable pipeline stages: This
+     *     spans all the outputs in all compilation units of the first stage
+     *     and all the inputs in all compilation units of the second stage.
+     *
+     * Therefore this rule applies to both inter- and intra-stage linking.
+     *
+     * The easiest way to implement this is to check whether the shader uses
+     * gl_PerVertex right after ast-to-ir conversion, and if it doesn't, simply
+     * remove all the relevant variable declaration from the IR, so that the
+     * linker won't see them and complain about mismatches.
+     */
+    remove_per_vertex_blocks(instructions, state, ir_var_shader_in);
+    remove_per_vertex_blocks(instructions, state, ir_var_shader_out);
+ 
+    /* Check that we don't have reads from write-only variables */
+    read_from_write_only_variable_visitor v;
+    v.run(instructions);
+    ir_variable *error_var = v.get_variable();
+    if (error_var) {
+       /* It would be nice to have proper location information, but for that
+        * we would need to check this as we process each kind of AST node
+        */
+       YYLTYPE loc;
+       memset(&loc, 0, sizeof(loc));
+       _mesa_glsl_error(&loc, state, "Read from write-only variable `%s'",
+                        error_var->name);
+    }
+ }
+ 
+ 
+ static ir_expression_operation
+ get_conversion_operation(const glsl_type *to, const glsl_type *from,
+                          struct _mesa_glsl_parse_state *state)
+ {
+    switch (to->base_type) {
+    case GLSL_TYPE_FLOAT:
+       switch (from->base_type) {
+       case GLSL_TYPE_INT: return ir_unop_i2f;
+       case GLSL_TYPE_UINT: return ir_unop_u2f;
+       case GLSL_TYPE_DOUBLE: return ir_unop_d2f;
+       default: return (ir_expression_operation)0;
+       }
+ 
+    case GLSL_TYPE_UINT:
+       if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable)
+          return (ir_expression_operation)0;
+       switch (from->base_type) {
+          case GLSL_TYPE_INT: return ir_unop_i2u;
+          default: return (ir_expression_operation)0;
+       }
+ 
+    case GLSL_TYPE_DOUBLE:
+       if (!state->has_double())
+          return (ir_expression_operation)0;
+       switch (from->base_type) {
+       case GLSL_TYPE_INT: return ir_unop_i2d;
+       case GLSL_TYPE_UINT: return ir_unop_u2d;
+       case GLSL_TYPE_FLOAT: return ir_unop_f2d;
+       default: return (ir_expression_operation)0;
+       }
+ 
+    default: return (ir_expression_operation)0;
+    }
+ }
+ 
+ 
+ /**
+  * If a conversion is available, convert one operand to a different type
+  *
+  * The \c from \c ir_rvalue is converted "in place".
+  *
+  * \param to     Type that the operand it to be converted to
+  * \param from   Operand that is being converted
+  * \param state  GLSL compiler state
+  *
+  * \return
+  * If a conversion is possible (or unnecessary), \c true is returned.
+  * Otherwise \c false is returned.
+  */
+ bool
+ apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from,
+                           struct _mesa_glsl_parse_state *state)
+ {
+    void *ctx = state;
+    if (to->base_type == from->type->base_type)
+       return true;
+ 
+    /* Prior to GLSL 1.20, there are no implicit conversions */
+    if (!state->is_version(120, 0))
+       return false;
+ 
+    /* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec:
+     *
+     *    "There are no implicit array or structure conversions. For
+     *    example, an array of int cannot be implicitly converted to an
+     *    array of float.
+     */
+    if (!to->is_numeric() || !from->type->is_numeric())
+       return false;
+ 
+    /* We don't actually want the specific type `to`, we want a type
+     * with the same base type as `to`, but the same vector width as
+     * `from`.
+     */
+    to = glsl_type::get_instance(to->base_type, from->type->vector_elements,
+                                 from->type->matrix_columns);
+ 
+    ir_expression_operation op = get_conversion_operation(to, from->type, state);
+    if (op) {
+       from = new(ctx) ir_expression(op, to, from, NULL);
+       return true;
+    } else {
+       return false;
+    }
+ }
+ 
+ 
+ static const struct glsl_type *
+ arithmetic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
+                        bool multiply,
+                        struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+    const glsl_type *type_a = value_a->type;
+    const glsl_type *type_b = value_b->type;
+ 
+    /* From GLSL 1.50 spec, page 56:
+     *
+     *    "The arithmetic binary operators add (+), subtract (-),
+     *    multiply (*), and divide (/) operate on integer and
+     *    floating-point scalars, vectors, and matrices."
+     */
+    if (!type_a->is_numeric() || !type_b->is_numeric()) {
+       _mesa_glsl_error(loc, state,
+                        "operands to arithmetic operators must be numeric");
+       return glsl_type::error_type;
+    }
+ 
+ 
+    /*    "If one operand is floating-point based and the other is
+     *    not, then the conversions from Section 4.1.10 "Implicit
+     *    Conversions" are applied to the non-floating-point-based operand."
+     */
+    if (!apply_implicit_conversion(type_a, value_b, state)
+        && !apply_implicit_conversion(type_b, value_a, state)) {
+       _mesa_glsl_error(loc, state,
+                        "could not implicitly convert operands to "
+                        "arithmetic operator");
+       return glsl_type::error_type;
+    }
+    type_a = value_a->type;
+    type_b = value_b->type;
+ 
+    /*    "If the operands are integer types, they must both be signed or
+     *    both be unsigned."
+     *
+     * From this rule and the preceeding conversion it can be inferred that
+     * both types must be GLSL_TYPE_FLOAT, or GLSL_TYPE_UINT, or GLSL_TYPE_INT.
+     * The is_numeric check above already filtered out the case where either
+     * type is not one of these, so now the base types need only be tested for
+     * equality.
+     */
+    if (type_a->base_type != type_b->base_type) {
+       _mesa_glsl_error(loc, state,
+                        "base type mismatch for arithmetic operator");
+       return glsl_type::error_type;
+    }
+ 
+    /*    "All arithmetic binary operators result in the same fundamental type
+     *    (signed integer, unsigned integer, or floating-point) as the
+     *    operands they operate on, after operand type conversion. After
+     *    conversion, the following cases are valid
+     *
+     *    * The two operands are scalars. In this case the operation is
+     *      applied, resulting in a scalar."
+     */
+    if (type_a->is_scalar() && type_b->is_scalar())
+       return type_a;
+ 
+    /*   "* One operand is a scalar, and the other is a vector or matrix.
+     *      In this case, the scalar operation is applied independently to each
+     *      component of the vector or matrix, resulting in the same size
+     *      vector or matrix."
+     */
+    if (type_a->is_scalar()) {
+       if (!type_b->is_scalar())
+          return type_b;
+    } else if (type_b->is_scalar()) {
+       return type_a;
+    }
+ 
+    /* All of the combinations of <scalar, scalar>, <vector, scalar>,
+     * <scalar, vector>, <scalar, matrix>, and <matrix, scalar> have been
+     * handled.
+     */
+    assert(!type_a->is_scalar());
+    assert(!type_b->is_scalar());
+ 
+    /*   "* The two operands are vectors of the same size. In this case, the
+     *      operation is done component-wise resulting in the same size
+     *      vector."
+     */
+    if (type_a->is_vector() && type_b->is_vector()) {
+       if (type_a == type_b) {
+          return type_a;
+       } else {
+          _mesa_glsl_error(loc, state,
+                           "vector size mismatch for arithmetic operator");
+          return glsl_type::error_type;
+       }
+    }
+ 
+    /* All of the combinations of <scalar, scalar>, <vector, scalar>,
+     * <scalar, vector>, <scalar, matrix>, <matrix, scalar>, and
+     * <vector, vector> have been handled.  At least one of the operands must
+     * be matrix.  Further, since there are no integer matrix types, the base
+     * type of both operands must be float.
+     */
+    assert(type_a->is_matrix() || type_b->is_matrix());
+    assert(type_a->base_type == GLSL_TYPE_FLOAT ||
+           type_a->base_type == GLSL_TYPE_DOUBLE);
+    assert(type_b->base_type == GLSL_TYPE_FLOAT ||
+           type_b->base_type == GLSL_TYPE_DOUBLE);
+ 
+    /*   "* The operator is add (+), subtract (-), or divide (/), and the
+     *      operands are matrices with the same number of rows and the same
+     *      number of columns. In this case, the operation is done component-
+     *      wise resulting in the same size matrix."
+     *    * The operator is multiply (*), where both operands are matrices or
+     *      one operand is a vector and the other a matrix. A right vector
+     *      operand is treated as a column vector and a left vector operand as a
+     *      row vector. In all these cases, it is required that the number of
+     *      columns of the left operand is equal to the number of rows of the
+     *      right operand. Then, the multiply (*) operation does a linear
+     *      algebraic multiply, yielding an object that has the same number of
+     *      rows as the left operand and the same number of columns as the right
+     *      operand. Section 5.10 "Vector and Matrix Operations" explains in
+     *      more detail how vectors and matrices are operated on."
+     */
+    if (! multiply) {
+       if (type_a == type_b)
+          return type_a;
+    } else {
+       const glsl_type *type = glsl_type::get_mul_type(type_a, type_b);
+ 
+       if (type == glsl_type::error_type) {
+          _mesa_glsl_error(loc, state,
+                           "size mismatch for matrix multiplication");
+       }
+ 
+       return type;
+    }
+ 
+ 
+    /*    "All other cases are illegal."
+     */
+    _mesa_glsl_error(loc, state, "type mismatch");
+    return glsl_type::error_type;
+ }
+ 
+ 
+ static const struct glsl_type *
+ unary_arithmetic_result_type(const struct glsl_type *type,
+                              struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+    /* From GLSL 1.50 spec, page 57:
+     *
+     *    "The arithmetic unary operators negate (-), post- and pre-increment
+     *     and decrement (-- and ++) operate on integer or floating-point
+     *     values (including vectors and matrices). All unary operators work
+     *     component-wise on their operands. These result with the same type
+     *     they operated on."
+     */
+    if (!type->is_numeric()) {
+       _mesa_glsl_error(loc, state,
+                        "operands to arithmetic operators must be numeric");
+       return glsl_type::error_type;
+    }
+ 
+    return type;
+ }
+ 
+ /**
+  * \brief Return the result type of a bit-logic operation.
+  *
+  * If the given types to the bit-logic operator are invalid, return
+  * glsl_type::error_type.
+  *
+  * \param value_a LHS of bit-logic op
+  * \param value_b RHS of bit-logic op
+  */
+ static const struct glsl_type *
+ bit_logic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
+                       ast_operators op,
+                       struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+    const glsl_type *type_a = value_a->type;
+    const glsl_type *type_b = value_b->type;
+ 
+    if (!state->check_bitwise_operations_allowed(loc)) {
+       return glsl_type::error_type;
+    }
+ 
+    /* From page 50 (page 56 of PDF) of GLSL 1.30 spec:
+     *
+     *     "The bitwise operators and (&), exclusive-or (^), and inclusive-or
+     *     (|). The operands must be of type signed or unsigned integers or
+     *     integer vectors."
+     */
+    if (!type_a->is_integer()) {
+       _mesa_glsl_error(loc, state, "LHS of `%s' must be an integer",
+                         ast_expression::operator_string(op));
+       return glsl_type::error_type;
+    }
+    if (!type_b->is_integer()) {
+       _mesa_glsl_error(loc, state, "RHS of `%s' must be an integer",
+                        ast_expression::operator_string(op));
+       return glsl_type::error_type;
+    }
+ 
+    /* Prior to GLSL 4.0 / GL_ARB_gpu_shader5, implicit conversions didn't
+     * make sense for bitwise operations, as they don't operate on floats.
+     *
+     * GLSL 4.0 added implicit int -> uint conversions, which are relevant
+     * here.  It wasn't clear whether or not we should apply them to bitwise
+     * operations.  However, Khronos has decided that they should in future
+     * language revisions.  Applications also rely on this behavior.  We opt
+     * to apply them in general, but issue a portability warning.
+     *
+     * See https://www.khronos.org/bugzilla/show_bug.cgi?id=1405
+     */
+    if (type_a->base_type != type_b->base_type) {
+       if (!apply_implicit_conversion(type_a, value_b, state)
+           && !apply_implicit_conversion(type_b, value_a, state)) {
+          _mesa_glsl_error(loc, state,
+                           "could not implicitly convert operands to "
+                           "`%s` operator",
+                           ast_expression::operator_string(op));
+          return glsl_type::error_type;
+       } else {
+          _mesa_glsl_warning(loc, state,
+                             "some implementations may not support implicit "
+                             "int -> uint conversions for `%s' operators; "
+                             "consider casting explicitly for portability",
+                             ast_expression::operator_string(op));
+       }
+       type_a = value_a->type;
+       type_b = value_b->type;
+    }
+ 
+    /*     "The fundamental types of the operands (signed or unsigned) must
+     *     match,"
+     */
+    if (type_a->base_type != type_b->base_type) {
+       _mesa_glsl_error(loc, state, "operands of `%s' must have the same "
+                        "base type", ast_expression::operator_string(op));
+       return glsl_type::error_type;
+    }
+ 
+    /*     "The operands cannot be vectors of differing size." */
+    if (type_a->is_vector() &&
+        type_b->is_vector() &&
+        type_a->vector_elements != type_b->vector_elements) {
+       _mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of "
+                        "different sizes", ast_expression::operator_string(op));
+       return glsl_type::error_type;
+    }
+ 
+    /*     "If one operand is a scalar and the other a vector, the scalar is
+     *     applied component-wise to the vector, resulting in the same type as
+     *     the vector. The fundamental types of the operands [...] will be the
+     *     resulting fundamental type."
+     */
+    if (type_a->is_scalar())
+        return type_b;
+    else
+        return type_a;
+ }
+ 
+ static const struct glsl_type *
+ modulus_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
+                     struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+    const glsl_type *type_a = value_a->type;
+    const glsl_type *type_b = value_b->type;
+ 
+    if (!state->check_version(130, 300, loc, "operator '%%' is reserved")) {
+       return glsl_type::error_type;
+    }
+ 
+    /* Section 5.9 (Expressions) of the GLSL 4.00 specification says:
+     *
+     *    "The operator modulus (%) operates on signed or unsigned integers or
+     *    integer vectors."
+     */
+    if (!type_a->is_integer()) {
+       _mesa_glsl_error(loc, state, "LHS of operator %% must be an integer");
+       return glsl_type::error_type;
+    }
+    if (!type_b->is_integer()) {
+       _mesa_glsl_error(loc, state, "RHS of operator %% must be an integer");
+       return glsl_type::error_type;
+    }
+ 
+    /*    "If the fundamental types in the operands do not match, then the
+     *    conversions from section 4.1.10 "Implicit Conversions" are applied
+     *    to create matching types."
+     *
+     * Note that GLSL 4.00 (and GL_ARB_gpu_shader5) introduced implicit
+     * int -> uint conversion rules.  Prior to that, there were no implicit
+     * conversions.  So it's harmless to apply them universally - no implicit
+     * conversions will exist.  If the types don't match, we'll receive false,
+     * and raise an error, satisfying the GLSL 1.50 spec, page 56:
+     *
+     *    "The operand types must both be signed or unsigned."
+     */
+    if (!apply_implicit_conversion(type_a, value_b, state) &&
+        !apply_implicit_conversion(type_b, value_a, state)) {
+       _mesa_glsl_error(loc, state,
+                        "could not implicitly convert operands to "
+                        "modulus (%%) operator");
+       return glsl_type::error_type;
+    }
+    type_a = value_a->type;
+    type_b = value_b->type;
+ 
+    /*    "The operands cannot be vectors of differing size. If one operand is
+     *    a scalar and the other vector, then the scalar is applied component-
+     *    wise to the vector, resulting in the same type as the vector. If both
+     *    are vectors of the same size, the result is computed component-wise."
+     */
+    if (type_a->is_vector()) {
+       if (!type_b->is_vector()
+           || (type_a->vector_elements == type_b->vector_elements))
+       return type_a;
+    } else
+       return type_b;
+ 
+    /*    "The operator modulus (%) is not defined for any other data types
+     *    (non-integer types)."
+     */
+    _mesa_glsl_error(loc, state, "type mismatch");
+    return glsl_type::error_type;
+ }
+ 
+ 
+ static const struct glsl_type *
+ relational_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
+                        struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+    const glsl_type *type_a = value_a->type;
+    const glsl_type *type_b = value_b->type;
+ 
+    /* From GLSL 1.50 spec, page 56:
+     *    "The relational operators greater than (>), less than (<), greater
+     *    than or equal (>=), and less than or equal (<=) operate only on
+     *    scalar integer and scalar floating-point expressions."
+     */
+    if (!type_a->is_numeric()
+        || !type_b->is_numeric()
+        || !type_a->is_scalar()
+        || !type_b->is_scalar()) {
+       _mesa_glsl_error(loc, state,
+                        "operands to relational operators must be scalar and "
+                        "numeric");
+       return glsl_type::error_type;
+    }
+ 
+    /*    "Either the operands' types must match, or the conversions from
+     *    Section 4.1.10 "Implicit Conversions" will be applied to the integer
+     *    operand, after which the types must match."
+     */
+    if (!apply_implicit_conversion(type_a, value_b, state)
+        && !apply_implicit_conversion(type_b, value_a, state)) {
+       _mesa_glsl_error(loc, state,
+                        "could not implicitly convert operands to "
+                        "relational operator");
+       return glsl_type::error_type;
+    }
+    type_a = value_a->type;
+    type_b = value_b->type;
+ 
+    if (type_a->base_type != type_b->base_type) {
+       _mesa_glsl_error(loc, state, "base type mismatch");
+       return glsl_type::error_type;
+    }
+ 
+    /*    "The result is scalar Boolean."
+     */
+    return glsl_type::bool_type;
+ }
+ 
+ /**
+  * \brief Return the result type of a bit-shift operation.
+  *
+  * If the given types to the bit-shift operator are invalid, return
+  * glsl_type::error_type.
+  *
+  * \param type_a Type of LHS of bit-shift op
+  * \param type_b Type of RHS of bit-shift op
+  */
+ static const struct glsl_type *
+ shift_result_type(const struct glsl_type *type_a,
+                   const struct glsl_type *type_b,
+                   ast_operators op,
+                   struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+    if (!state->check_bitwise_operations_allowed(loc)) {
+       return glsl_type::error_type;
+    }
+ 
+    /* From page 50 (page 56 of the PDF) of the GLSL 1.30 spec:
+     *
+     *     "The shift operators (<<) and (>>). For both operators, the operands
+     *     must be signed or unsigned integers or integer vectors. One operand
+     *     can be signed while the other is unsigned."
+     */
+    if (!type_a->is_integer()) {
+       _mesa_glsl_error(loc, state, "LHS of operator %s must be an integer or "
+                        "integer vector", ast_expression::operator_string(op));
+      return glsl_type::error_type;
+ 
+    }
+    if (!type_b->is_integer()) {
+       _mesa_glsl_error(loc, state, "RHS of operator %s must be an integer or "
+                        "integer vector", ast_expression::operator_string(op));
+      return glsl_type::error_type;
+    }
+ 
+    /*     "If the first operand is a scalar, the second operand has to be
+     *     a scalar as well."
+     */
+    if (type_a->is_scalar() && !type_b->is_scalar()) {
+       _mesa_glsl_error(loc, state, "if the first operand of %s is scalar, the "
+                        "second must be scalar as well",
+                        ast_expression::operator_string(op));
+      return glsl_type::error_type;
+    }
+ 
+    /* If both operands are vectors, check that they have same number of
+     * elements.
+     */
+    if (type_a->is_vector() &&
+       type_b->is_vector() &&
+       type_a->vector_elements != type_b->vector_elements) {
+       _mesa_glsl_error(loc, state, "vector operands to operator %s must "
+                        "have same number of elements",
+                        ast_expression::operator_string(op));
+      return glsl_type::error_type;
+    }
+ 
+    /*     "In all cases, the resulting type will be the same type as the left
+     *     operand."
+     */
+    return type_a;
+ }
+ 
+ /**
+  * Returns the innermost array index expression in an rvalue tree.
+  * This is the largest indexing level -- if an array of blocks, then
+  * it is the block index rather than an indexing expression for an
+  * array-typed member of an array of blocks.
+  */
+ static ir_rvalue *
+ find_innermost_array_index(ir_rvalue *rv)
+ {
+    ir_dereference_array *last = NULL;
+    while (rv) {
+       if (rv->as_dereference_array()) {
+          last = rv->as_dereference_array();
+          rv = last->array;
+       } else if (rv->as_dereference_record())
+          rv = rv->as_dereference_record()->record;
+       else if (rv->as_swizzle())
+          rv = rv->as_swizzle()->val;
+       else
+          rv = NULL;
+    }
+ 
+    if (last)
+       return last->array_index;
+ 
+    return NULL;
+ }
+ 
+ /**
+  * Validates that a value can be assigned to a location with a specified type
+  *
+  * Validates that \c rhs can be assigned to some location.  If the types are
+  * not an exact match but an automatic conversion is possible, \c rhs will be
+  * converted.
+  *
+  * \return
+  * \c NULL if \c rhs cannot be assigned to a location with type \c lhs_type.
+  * Otherwise the actual RHS to be assigned will be returned.  This may be
+  * \c rhs, or it may be \c rhs after some type conversion.
+  *
+  * \note
+  * In addition to being used for assignments, this function is used to
+  * type-check return values.
+  */
+ static ir_rvalue *
+ validate_assignment(struct _mesa_glsl_parse_state *state,
+                     YYLTYPE loc, ir_rvalue *lhs,
+                     ir_rvalue *rhs, bool is_initializer)
+ {
+    /* If there is already some error in the RHS, just return it.  Anything
+     * else will lead to an avalanche of error message back to the user.
+     */
+    if (rhs->type->is_error())
+       return rhs;
+ 
+    /* In the Tessellation Control Shader:
+     * If a per-vertex output variable is used as an l-value, it is an error
+     * if the expression indicating the vertex number is not the identifier
+     * `gl_InvocationID`.
+     */
+    if (state->stage == MESA_SHADER_TESS_CTRL) {
+       ir_variable *var = lhs->variable_referenced();
+       if (var->data.mode == ir_var_shader_out && !var->data.patch) {
+          ir_rvalue *index = find_innermost_array_index(lhs);
+          ir_variable *index_var = index ? index->variable_referenced() : NULL;
+          if (!index_var || strcmp(index_var->name, "gl_InvocationID") != 0) {
+             _mesa_glsl_error(&loc, state,
+                              "Tessellation control shader outputs can only "
+                              "be indexed by gl_InvocationID");
+             return NULL;
+          }
+       }
+    }
+ 
+    /* If the types are identical, the assignment can trivially proceed.
+     */
+    if (rhs->type == lhs->type)
+       return rhs;
+ 
+    /* If the array element types are the same and the LHS is unsized,
+     * the assignment is okay for initializers embedded in variable
+     * declarations.
+     *
+     * Note: Whole-array assignments are not permitted in GLSL 1.10, but this
+     * is handled by ir_dereference::is_lvalue.
+     */
+    const glsl_type *lhs_t = lhs->type;
+    const glsl_type *rhs_t = rhs->type;
+    bool unsized_array = false;
+    while(lhs_t->is_array()) {
+       if (rhs_t == lhs_t)
+          break; /* the rest of the inner arrays match so break out early */
+       if (!rhs_t->is_array()) {
+          unsized_array = false;
+          break; /* number of dimensions mismatch */
+       }
+       if (lhs_t->length == rhs_t->length) {
+          lhs_t = lhs_t->fields.array;
+          rhs_t = rhs_t->fields.array;
+          continue;
+       } else if (lhs_t->is_unsized_array()) {
+          unsized_array = true;
+       } else {
+          unsized_array = false;
+          break; /* sized array mismatch */
+       }
+       lhs_t = lhs_t->fields.array;
+       rhs_t = rhs_t->fields.array;
+    }
+    if (unsized_array) {
+       if (is_initializer) {
+          return rhs;
+       } else {
+          _mesa_glsl_error(&loc, state,
+                           "implicitly sized arrays cannot be assigned");
+          return NULL;
+       }
+    }
+ 
+    /* Check for implicit conversion in GLSL 1.20 */
+    if (apply_implicit_conversion(lhs->type, rhs, state)) {
+       if (rhs->type == lhs->type)
+        return rhs;
+    }
+ 
+    _mesa_glsl_error(&loc, state,
+                     "%s of type %s cannot be assigned to "
+                     "variable of type %s",
+                     is_initializer ? "initializer" : "value",
+                     rhs->type->name, lhs->type->name);
+ 
+    return NULL;
+ }
+ 
+ static void
+ mark_whole_array_access(ir_rvalue *access)
+ {
+    ir_dereference_variable *deref = access->as_dereference_variable();
+ 
+    if (deref && deref->var) {
+       deref->var->data.max_array_access = deref->type->length - 1;
+    }
+ }
+ 
+ static bool
+ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
+               const char *non_lvalue_description,
+               ir_rvalue *lhs, ir_rvalue *rhs,
+               ir_rvalue **out_rvalue, bool needs_rvalue,
+               bool is_initializer,
+               YYLTYPE lhs_loc)
+ {
+    void *ctx = state;
+    bool error_emitted = (lhs->type->is_error() || rhs->type->is_error());
+ 
+    ir_variable *lhs_var = lhs->variable_referenced();
+    if (lhs_var)
+       lhs_var->data.assigned = true;
+ 
+    if (!error_emitted) {
+       if (non_lvalue_description != NULL) {
+          _mesa_glsl_error(&lhs_loc, state,
+                           "assignment to %s",
+                           non_lvalue_description);
+          error_emitted = true;
+       } else if (lhs_var != NULL && (lhs_var->data.read_only ||
+                  (lhs_var->data.mode == ir_var_shader_storage &&
+                   lhs_var->data.image_read_only))) {
+          /* We can have image_read_only set on both images and buffer variables,
+           * but in the former there is a distinction between assignments to
+           * the variable itself (read_only) and to the memory they point to
+           * (image_read_only), while in the case of buffer variables there is
+           * no such distinction, that is why this check here is limited to
+           * buffer variables alone.
+           */
+          _mesa_glsl_error(&lhs_loc, state,
+                           "assignment to read-only variable '%s'",
+                           lhs_var->name);
+          error_emitted = true;
+       } else if (lhs->type->is_array() &&
+                  !state->check_version(120, 300, &lhs_loc,
+                                        "whole array assignment forbidden")) {
+          /* From page 32 (page 38 of the PDF) of the GLSL 1.10 spec:
+           *
+           *    "Other binary or unary expressions, non-dereferenced
+           *     arrays, function names, swizzles with repeated fields,
+           *     and constants cannot be l-values."
+           *
+           * The restriction on arrays is lifted in GLSL 1.20 and GLSL ES 3.00.
+           */
+          error_emitted = true;
+       } else if (!lhs->is_lvalue()) {
+          _mesa_glsl_error(& lhs_loc, state, "non-lvalue in assignment");
+          error_emitted = true;
+       }
+    }
+ 
+    ir_rvalue *new_rhs =
+       validate_assignment(state, lhs_loc, lhs, rhs, is_initializer);
+    if (new_rhs != NULL) {
+       rhs = new_rhs;
+ 
+       /* If the LHS array was not declared with a size, it takes it size from
+        * the RHS.  If the LHS is an l-value and a whole array, it must be a
+        * dereference of a variable.  Any other case would require that the LHS
+        * is either not an l-value or not a whole array.
+        */
+       if (lhs->type->is_unsized_array()) {
+          ir_dereference *const d = lhs->as_dereference();
+ 
+          assert(d != NULL);
+ 
+          ir_variable *const var = d->variable_referenced();
+ 
+          assert(var != NULL);
+ 
+          if (var->data.max_array_access >= unsigned(rhs->type->array_size())) {
+             /* FINISHME: This should actually log the location of the RHS. */
+             _mesa_glsl_error(& lhs_loc, state, "array size must be > %u due to "
+                              "previous access",
+                              var->data.max_array_access);
+          }
+ 
+          var->type = glsl_type::get_array_instance(lhs->type->fields.array,
+                                                    rhs->type->array_size());
+          d->type = var->type;
+       }
+       if (lhs->type->is_array()) {
+          mark_whole_array_access(rhs);
+          mark_whole_array_access(lhs);
+       }
+    }
+ 
+    /* Most callers of do_assignment (assign, add_assign, pre_inc/dec,
+     * but not post_inc) need the converted assigned value as an rvalue
+     * to handle things like:
+     *
+     * i = j += 1;
+     */
+    if (needs_rvalue) {
+       ir_variable *var = new(ctx) ir_variable(rhs->type, "assignment_tmp",
+                                               ir_var_temporary);
+       instructions->push_tail(var);
+       instructions->push_tail(assign(var, rhs));
+ 
+       if (!error_emitted) {
+          ir_dereference_variable *deref_var = new(ctx) ir_dereference_variable(var);
+          instructions->push_tail(new(ctx) ir_assignment(lhs, deref_var));
+       }
+       ir_rvalue *rvalue = new(ctx) ir_dereference_variable(var);
+ 
+       *out_rvalue = rvalue;
+    } else {
+       if (!error_emitted)
+          instructions->push_tail(new(ctx) ir_assignment(lhs, rhs));
+       *out_rvalue = NULL;
+    }
+ 
+    return error_emitted;
+ }
+ 
+ static ir_rvalue *
+ get_lvalue_copy(exec_list *instructions, ir_rvalue *lvalue)
+ {
+    void *ctx = ralloc_parent(lvalue);
+    ir_variable *var;
+ 
+    var = new(ctx) ir_variable(lvalue->type, "_post_incdec_tmp",
+                             ir_var_temporary);
+    instructions->push_tail(var);
+ 
+    instructions->push_tail(new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var),
+                                                 lvalue));
+ 
+    return new(ctx) ir_dereference_variable(var);
+ }
+ 
+ 
+ ir_rvalue *
+ ast_node::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
+ {
+    (void) instructions;
+    (void) state;
+ 
+    return NULL;
+ }
+ 
+ bool
+ ast_node::has_sequence_subexpression() const
+ {
+    return false;
+ }
+ 
+ void
+ ast_function_expression::hir_no_rvalue(exec_list *instructions,
+                                        struct _mesa_glsl_parse_state *state)
+ {
+    (void)hir(instructions, state);
+ }
+ 
+ void
+ ast_aggregate_initializer::hir_no_rvalue(exec_list *instructions,
+                                          struct _mesa_glsl_parse_state *state)
+ {
+    (void)hir(instructions, state);
+ }
+ 
+ static ir_rvalue *
+ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
+ {
+    int join_op;
+    ir_rvalue *cmp = NULL;
+ 
+    if (operation == ir_binop_all_equal)
+       join_op = ir_binop_logic_and;
+    else
+       join_op = ir_binop_logic_or;
+ 
+    switch (op0->type->base_type) {
+    case GLSL_TYPE_FLOAT:
+    case GLSL_TYPE_UINT:
+    case GLSL_TYPE_INT:
+    case GLSL_TYPE_BOOL:
+    case GLSL_TYPE_DOUBLE:
+       return new(mem_ctx) ir_expression(operation, op0, op1);
+ 
+    case GLSL_TYPE_ARRAY: {
+       for (unsigned int i = 0; i < op0->type->length; i++) {
+          ir_rvalue *e0, *e1, *result;
+ 
+          e0 = new(mem_ctx) ir_dereference_array(op0->clone(mem_ctx, NULL),
+                                                 new(mem_ctx) ir_constant(i));
+          e1 = new(mem_ctx) ir_dereference_array(op1->clone(mem_ctx, NULL),
+                                                 new(mem_ctx) ir_constant(i));
+          result = do_comparison(mem_ctx, operation, e0, e1);
+ 
+          if (cmp) {
+             cmp = new(mem_ctx) ir_expression(join_op, cmp, result);
+          } else {
+             cmp = result;
+          }
+       }
+ 
+       mark_whole_array_access(op0);
+       mark_whole_array_access(op1);
+       break;
+    }
+ 
+    case GLSL_TYPE_STRUCT: {
+       for (unsigned int i = 0; i < op0->type->length; i++) {
+          ir_rvalue *e0, *e1, *result;
+          const char *field_name = op0->type->fields.structure[i].name;
+ 
+          e0 = new(mem_ctx) ir_dereference_record(op0->clone(mem_ctx, NULL),
+                                                  field_name);
+          e1 = new(mem_ctx) ir_dereference_record(op1->clone(mem_ctx, NULL),
+                                                  field_name);
+          result = do_comparison(mem_ctx, operation, e0, e1);
+ 
+          if (cmp) {
+             cmp = new(mem_ctx) ir_expression(join_op, cmp, result);
+          } else {
+             cmp = result;
+          }
+       }
+       break;
+    }
+ 
+    case GLSL_TYPE_ERROR:
+    case GLSL_TYPE_VOID:
+    case GLSL_TYPE_SAMPLER:
+    case GLSL_TYPE_IMAGE:
+    case GLSL_TYPE_INTERFACE:
++   case GLSL_TYPE_FUNCTION:
+    case GLSL_TYPE_ATOMIC_UINT:
+    case GLSL_TYPE_SUBROUTINE:
+       /* I assume a comparison of a struct containing a sampler just
+        * ignores the sampler present in the type.
+        */
+       break;
+    }
+ 
+    if (cmp == NULL)
+       cmp = new(mem_ctx) ir_constant(true);
+ 
+    return cmp;
+ }
+ 
+ /* For logical operations, we want to ensure that the operands are
+  * scalar booleans.  If it isn't, emit an error and return a constant
+  * boolean to avoid triggering cascading error messages.
+  */
+ ir_rvalue *
+ get_scalar_boolean_operand(exec_list *instructions,
+                          struct _mesa_glsl_parse_state *state,
+                          ast_expression *parent_expr,
+                          int operand,
+                          const char *operand_name,
+                          bool *error_emitted)
+ {
+    ast_expression *expr = parent_expr->subexpressions[operand];
+    void *ctx = state;
+    ir_rvalue *val = expr->hir(instructions, state);
+ 
+    if (val->type->is_boolean() && val->type->is_scalar())
+       return val;
+ 
+    if (!*error_emitted) {
+       YYLTYPE loc = expr->get_location();
+       _mesa_glsl_error(&loc, state, "%s of `%s' must be scalar boolean",
+                        operand_name,
+                        parent_expr->operator_string(parent_expr->oper));
+       *error_emitted = true;
+    }
+ 
+    return new(ctx) ir_constant(true);
+ }
+ 
+ /**
+  * If name refers to a builtin array whose maximum allowed size is less than
+  * size, report an error and return true.  Otherwise return false.
+  */
+ void
+ check_builtin_array_max_size(const char *name, unsigned size,
+                              YYLTYPE loc, struct _mesa_glsl_parse_state *state)
+ {
+    if ((strcmp("gl_TexCoord", name) == 0)
+        && (size > state->Const.MaxTextureCoords)) {
+       /* From page 54 (page 60 of the PDF) of the GLSL 1.20 spec:
+        *
+        *     "The size [of gl_TexCoord] can be at most
+        *     gl_MaxTextureCoords."
+        */
+       _mesa_glsl_error(&loc, state, "`gl_TexCoord' array size cannot "
+                        "be larger than gl_MaxTextureCoords (%u)",
+                        state->Const.MaxTextureCoords);
+    } else if (strcmp("gl_ClipDistance", name) == 0
+               && size > state->Const.MaxClipPlanes) {
+       /* From section 7.1 (Vertex Shader Special Variables) of the
+        * GLSL 1.30 spec:
+        *
+        *   "The gl_ClipDistance array is predeclared as unsized and
+        *   must be sized by the shader either redeclaring it with a
+        *   size or indexing it only with integral constant
+        *   expressions. ... The size can be at most
+        *   gl_MaxClipDistances."
+        */
+       _mesa_glsl_error(&loc, state, "`gl_ClipDistance' array size cannot "
+                        "be larger than gl_MaxClipDistances (%u)",
+                        state->Const.MaxClipPlanes);
+    }
+ }
+ 
+ /**
+  * Create the constant 1, of a which is appropriate for incrementing and
+  * decrementing values of the given GLSL type.  For example, if type is vec4,
+  * this creates a constant value of 1.0 having type float.
+  *
+  * If the given type is invalid for increment and decrement operators, return
+  * a floating point 1--the error will be detected later.
+  */
+ static ir_rvalue *
+ constant_one_for_inc_dec(void *ctx, const glsl_type *type)
+ {
+    switch (type->base_type) {
+    case GLSL_TYPE_UINT:
+       return new(ctx) ir_constant((unsigned) 1);
+    case GLSL_TYPE_INT:
+       return new(ctx) ir_constant(1);
+    default:
+    case GLSL_TYPE_FLOAT:
+       return new(ctx) ir_constant(1.0f);
+    }
+ }
+ 
+ ir_rvalue *
+ ast_expression::hir(exec_list *instructions,
+                     struct _mesa_glsl_parse_state *state)
+ {
+    return do_hir(instructions, state, true);
+ }
+ 
+ void
+ ast_expression::hir_no_rvalue(exec_list *instructions,
+                               struct _mesa_glsl_parse_state *state)
+ {
+    do_hir(instructions, state, false);
+ }
+ 
+ ir_rvalue *
+ ast_expression::do_hir(exec_list *instructions,
+                        struct _mesa_glsl_parse_state *state,
+                        bool needs_rvalue)
+ {
+    void *ctx = state;
+    static const int operations[AST_NUM_OPERATORS] = {
+       -1,               /* ast_assign doesn't convert to ir_expression. */
+       -1,               /* ast_plus doesn't convert to ir_expression. */
+       ir_unop_neg,
+       ir_binop_add,
+       ir_binop_sub,
+       ir_binop_mul,
+       ir_binop_div,
+       ir_binop_mod,
+       ir_binop_lshift,
+       ir_binop_rshift,
+       ir_binop_less,
+       ir_binop_greater,
+       ir_binop_lequal,
+       ir_binop_gequal,
+       ir_binop_all_equal,
+       ir_binop_any_nequal,
+       ir_binop_bit_and,
+       ir_binop_bit_xor,
+       ir_binop_bit_or,
+       ir_unop_bit_not,
+       ir_binop_logic_and,
+       ir_binop_logic_xor,
+       ir_binop_logic_or,
+       ir_unop_logic_not,
+ 
+       /* Note: The following block of expression types actually convert
+        * to multiple IR instructions.
+        */
+       ir_binop_mul,     /* ast_mul_assign */
+       ir_binop_div,     /* ast_div_assign */
+       ir_binop_mod,     /* ast_mod_assign */
+       ir_binop_add,     /* ast_add_assign */
+       ir_binop_sub,     /* ast_sub_assign */
+       ir_binop_lshift,  /* ast_ls_assign */
+       ir_binop_rshift,  /* ast_rs_assign */
+       ir_binop_bit_and, /* ast_and_assign */
+       ir_binop_bit_xor, /* ast_xor_assign */
+       ir_binop_bit_or,  /* ast_or_assign */
+ 
+       -1,               /* ast_conditional doesn't convert to ir_expression. */
+       ir_binop_add,     /* ast_pre_inc. */
+       ir_binop_sub,     /* ast_pre_dec. */
+       ir_binop_add,     /* ast_post_inc. */
+       ir_binop_sub,     /* ast_post_dec. */
+       -1,               /* ast_field_selection doesn't conv to ir_expression. */
+       -1,               /* ast_array_index doesn't convert to ir_expression. */
+       -1,               /* ast_function_call doesn't conv to ir_expression. */
+       -1,               /* ast_identifier doesn't convert to ir_expression. */
+       -1,               /* ast_int_constant doesn't convert to ir_expression. */
+       -1,               /* ast_uint_constant doesn't conv to ir_expression. */
+       -1,               /* ast_float_constant doesn't conv to ir_expression. */
+       -1,               /* ast_bool_constant doesn't conv to ir_expression. */
+       -1,               /* ast_sequence doesn't convert to ir_expression. */
+    };
+    ir_rvalue *result = NULL;
+    ir_rvalue *op[3];
+    const struct glsl_type *type; /* a temporary variable for switch cases */
+    bool error_emitted = false;
+    YYLTYPE loc;
+ 
+    loc = this->get_location();
+ 
+    switch (this->oper) {
+    case ast_aggregate:
+       assert(!"ast_aggregate: Should never get here.");
+       break;
+ 
+    case ast_assign: {
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+       op[1] = this->subexpressions[1]->hir(instructions, state);
+ 
+       error_emitted =
+          do_assignment(instructions, state,
+                        this->subexpressions[0]->non_lvalue_description,
+                        op[0], op[1], &result, needs_rvalue, false,
+                        this->subexpressions[0]->get_location());
+       break;
+    }
+ 
+    case ast_plus:
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+ 
+       type = unary_arithmetic_result_type(op[0]->type, state, & loc);
+ 
+       error_emitted = type->is_error();
+ 
+       result = op[0];
+       break;
+ 
+    case ast_neg:
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+ 
+       type = unary_arithmetic_result_type(op[0]->type, state, & loc);
+ 
+       error_emitted = type->is_error();
+ 
+       result = new(ctx) ir_expression(operations[this->oper], type,
+                                       op[0], NULL);
+       break;
+ 
+    case ast_add:
+    case ast_sub:
+    case ast_mul:
+    case ast_div:
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+       op[1] = this->subexpressions[1]->hir(instructions, state);
+ 
+       type = arithmetic_result_type(op[0], op[1],
+                                     (this->oper == ast_mul),
+                                     state, & loc);
+       error_emitted = type->is_error();
+ 
+       result = new(ctx) ir_expression(operations[this->oper], type,
+                                       op[0], op[1]);
+       break;
+ 
+    case ast_mod:
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+       op[1] = this->subexpressions[1]->hir(instructions, state);
+ 
+       type = modulus_result_type(op[0], op[1], state, &loc);
+ 
+       assert(operations[this->oper] == ir_binop_mod);
+ 
+       result = new(ctx) ir_expression(operations[this->oper], type,
+                                       op[0], op[1]);
+       error_emitted = type->is_error();
+       break;
+ 
+    case ast_lshift:
+    case ast_rshift:
+        if (!state->check_bitwise_operations_allowed(&loc)) {
+           error_emitted = true;
+        }
+ 
+        op[0] = this->subexpressions[0]->hir(instructions, state);
+        op[1] = this->subexpressions[1]->hir(instructions, state);
+        type = shift_result_type(op[0]->type, op[1]->type, this->oper, state,
+                                 &loc);
+        result = new(ctx) ir_expression(operations[this->oper], type,
+                                        op[0], op[1]);
+        error_emitted = op[0]->type->is_error() || op[1]->type->is_error();
+        break;
+ 
+    case ast_less:
+    case ast_greater:
+    case ast_lequal:
+    case ast_gequal:
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+       op[1] = this->subexpressions[1]->hir(instructions, state);
+ 
+       type = relational_result_type(op[0], op[1], state, & loc);
+ 
+       /* The relational operators must either generate an error or result
+        * in a scalar boolean.  See page 57 of the GLSL 1.50 spec.
+        */
+       assert(type->is_error()
+            || ((type->base_type == GLSL_TYPE_BOOL)
+                && type->is_scalar()));
+ 
+       result = new(ctx) ir_expression(operations[this->oper], type,
+                                       op[0], op[1]);
+       error_emitted = type->is_error();
+       break;
+ 
+    case ast_nequal:
+    case ast_equal:
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+       op[1] = this->subexpressions[1]->hir(instructions, state);
+ 
+       /* From page 58 (page 64 of the PDF) of the GLSL 1.50 spec:
+        *
+        *    "The equality operators equal (==), and not equal (!=)
+        *    operate on all types. They result in a scalar Boolean. If
+        *    the operand types do not match, then there must be a
+        *    conversion from Section 4.1.10 "Implicit Conversions"
+        *    applied to one operand that can make them match, in which
+        *    case this conversion is done."
+        */
+ 
+       if (op[0]->type == glsl_type::void_type || op[1]->type == glsl_type::void_type) {
+          _mesa_glsl_error(& loc, state, "`%s':  wrong operand types: "
+                          "no operation `%1$s' exists that takes a left-hand "
+                          "operand of type 'void' or a right operand of type "
+                          "'void'", (this->oper == ast_equal) ? "==" : "!=");
+          error_emitted = true;
+       } else if ((!apply_implicit_conversion(op[0]->type, op[1], state)
+            && !apply_implicit_conversion(op[1]->type, op[0], state))
+           || (op[0]->type != op[1]->type)) {
+          _mesa_glsl_error(& loc, state, "operands of `%s' must have the same "
+                           "type", (this->oper == ast_equal) ? "==" : "!=");
+          error_emitted = true;
+       } else if ((op[0]->type->is_array() || op[1]->type->is_array()) &&
+                  !state->check_version(120, 300, &loc,
+                                        "array comparisons forbidden")) {
+          error_emitted = true;
+       } else if ((op[0]->type->contains_opaque() ||
+                   op[1]->type->contains_opaque())) {
+          _mesa_glsl_error(&loc, state, "opaque type comparisons forbidden");
+          error_emitted = true;
+       }
+ 
+       if (error_emitted) {
+          result = new(ctx) ir_constant(false);
+       } else {
+          result = do_comparison(ctx, operations[this->oper], op[0], op[1]);
+          assert(result->type == glsl_type::bool_type);
+       }
+       break;
+ 
+    case ast_bit_and:
+    case ast_bit_xor:
+    case ast_bit_or:
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+       op[1] = this->subexpressions[1]->hir(instructions, state);
+       type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc);
+       result = new(ctx) ir_expression(operations[this->oper], type,
+                                       op[0], op[1]);
+       error_emitted = op[0]->type->is_error() || op[1]->type->is_error();
+       break;
+ 
+    case ast_bit_not:
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+ 
+       if (!state->check_bitwise_operations_allowed(&loc)) {
+          error_emitted = true;
+       }
+ 
+       if (!op[0]->type->is_integer()) {
+          _mesa_glsl_error(&loc, state, "operand of `~' must be an integer");
+          error_emitted = true;
+       }
+ 
+       type = error_emitted ? glsl_type::error_type : op[0]->type;
+       result = new(ctx) ir_expression(ir_unop_bit_not, type, op[0], NULL);
+       break;
+ 
+    case ast_logic_and: {
+       exec_list rhs_instructions;
+       op[0] = get_scalar_boolean_operand(instructions, state, this, 0,
+                                          "LHS", &error_emitted);
+       op[1] = get_scalar_boolean_operand(&rhs_instructions, state, this, 1,
+                                          "RHS", &error_emitted);
+ 
+       if (rhs_instructions.is_empty()) {
+          result = new(ctx) ir_expression(ir_binop_logic_and, op[0], op[1]);
+          type = result->type;
+       } else {
+          ir_variable *const tmp = new(ctx) ir_variable(glsl_type::bool_type,
+                                                        "and_tmp",
+                                                        ir_var_temporary);
+          instructions->push_tail(tmp);
+ 
+          ir_if *const stmt = new(ctx) ir_if(op[0]);
+          instructions->push_tail(stmt);
+ 
+          stmt->then_instructions.append_list(&rhs_instructions);
+          ir_dereference *const then_deref = new(ctx) ir_dereference_variable(tmp);
+          ir_assignment *const then_assign =
+             new(ctx) ir_assignment(then_deref, op[1]);
+          stmt->then_instructions.push_tail(then_assign);
+ 
+          ir_dereference *const else_deref = new(ctx) ir_dereference_variable(tmp);
+          ir_assignment *const else_assign =
+             new(ctx) ir_assignment(else_deref, new(ctx) ir_constant(false));
+          stmt->else_instructions.push_tail(else_assign);
+ 
+          result = new(ctx) ir_dereference_variable(tmp);
+          type = tmp->type;
+       }
+       break;
+    }
+ 
+    case ast_logic_or: {
+       exec_list rhs_instructions;
+       op[0] = get_scalar_boolean_operand(instructions, state, this, 0,
+                                          "LHS", &error_emitted);
+       op[1] = get_scalar_boolean_operand(&rhs_instructions, state, this, 1,
+                                          "RHS", &error_emitted);
+ 
+       if (rhs_instructions.is_empty()) {
+          result = new(ctx) ir_expression(ir_binop_logic_or, op[0], op[1]);
+          type = result->type;
+       } else {
+          ir_variable *const tmp = new(ctx) ir_variable(glsl_type::bool_type,
+                                                        "or_tmp",
+                                                        ir_var_temporary);
+          instructions->push_tail(tmp);
+ 
+          ir_if *const stmt = new(ctx) ir_if(op[0]);
+          instructions->push_tail(stmt);
+ 
+          ir_dereference *const then_deref = new(ctx) ir_dereference_variable(tmp);
+          ir_assignment *const then_assign =
+             new(ctx) ir_assignment(then_deref, new(ctx) ir_constant(true));
+          stmt->then_instructions.push_tail(then_assign);
+ 
+          stmt->else_instructions.append_list(&rhs_instructions);
+          ir_dereference *const else_deref = new(ctx) ir_dereference_variable(tmp);
+          ir_assignment *const else_assign =
+             new(ctx) ir_assignment(else_deref, op[1]);
+          stmt->else_instructions.push_tail(else_assign);
+ 
+          result = new(ctx) ir_dereference_variable(tmp);
+          type = tmp->type;
+       }
+       break;
+    }
+ 
+    case ast_logic_xor:
+       /* From page 33 (page 39 of the PDF) of the GLSL 1.10 spec:
+        *
+        *    "The logical binary operators and (&&), or ( | | ), and
+        *     exclusive or (^^). They operate only on two Boolean
+        *     expressions and result in a Boolean expression."
+        */
+       op[0] = get_scalar_boolean_operand(instructions, state, this, 0, "LHS",
+                                          &error_emitted);
+       op[1] = get_scalar_boolean_operand(instructions, state, this, 1, "RHS",
+                                          &error_emitted);
+ 
+       result = new(ctx) ir_expression(operations[this->oper], glsl_type::bool_type,
+                                       op[0], op[1]);
+       break;
+ 
+    case ast_logic_not:
+       op[0] = get_scalar_boolean_operand(instructions, state, this, 0,
+                                          "operand", &error_emitted);
+ 
+       result = new(ctx) ir_expression(operations[this->oper], glsl_type::bool_type,
+                                       op[0], NULL);
+       break;
+ 
+    case ast_mul_assign:
+    case ast_div_assign:
+    case ast_add_assign:
+    case ast_sub_assign: {
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+       op[1] = this->subexpressions[1]->hir(instructions, state);
+ 
+       type = arithmetic_result_type(op[0], op[1],
+                                     (this->oper == ast_mul_assign),
+                                     state, & loc);
+ 
+       ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], type,
+                                                    op[0], op[1]);
+ 
+       error_emitted =
+          do_assignment(instructions, state,
+                        this->subexpressions[0]->non_lvalue_description,
+                        op[0]->clone(ctx, NULL), temp_rhs,
+                        &result, needs_rvalue, false,
+                        this->subexpressions[0]->get_location());
+ 
+       /* GLSL 1.10 does not allow array assignment.  However, we don't have to
+        * explicitly test for this because none of the binary expression
+        * operators allow array operands either.
+        */
+ 
+       break;
+    }
+ 
+    case ast_mod_assign: {
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+       op[1] = this->subexpressions[1]->hir(instructions, state);
+ 
+       type = modulus_result_type(op[0], op[1], state, &loc);
+ 
+       assert(operations[this->oper] == ir_binop_mod);
+ 
+       ir_rvalue *temp_rhs;
+       temp_rhs = new(ctx) ir_expression(operations[this->oper], type,
+                                         op[0], op[1]);
+ 
+       error_emitted =
+          do_assignment(instructions, state,
+                        this->subexpressions[0]->non_lvalue_description,
+                        op[0]->clone(ctx, NULL), temp_rhs,
+                        &result, needs_rvalue, false,
+                        this->subexpressions[0]->get_location());
+       break;
+    }
+ 
+    case ast_ls_assign:
+    case ast_rs_assign: {
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+       op[1] = this->subexpressions[1]->hir(instructions, state);
+       type = shift_result_type(op[0]->type, op[1]->type, this->oper, state,
+                                &loc);
+       ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper],
+                                                    type, op[0], op[1]);
+       error_emitted =
+          do_assignment(instructions, state,
+                        this->subexpressions[0]->non_lvalue_description,
+                        op[0]->clone(ctx, NULL), temp_rhs,
+                        &result, needs_rvalue, false,
+                        this->subexpressions[0]->get_location());
+       break;
+    }
+ 
+    case ast_and_assign:
+    case ast_xor_assign:
+    case ast_or_assign: {
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+       op[1] = this->subexpressions[1]->hir(instructions, state);
+       type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc);
+       ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper],
+                                                    type, op[0], op[1]);
+       error_emitted =
+          do_assignment(instructions, state,
+                        this->subexpressions[0]->non_lvalue_description,
+                        op[0]->clone(ctx, NULL), temp_rhs,
+                        &result, needs_rvalue, false,
+                        this->subexpressions[0]->get_location());
+       break;
+    }
+ 
+    case ast_conditional: {
+       /* From page 59 (page 65 of the PDF) of the GLSL 1.50 spec:
+        *
+        *    "The ternary selection operator (?:). It operates on three
+        *    expressions (exp1 ? exp2 : exp3). This operator evaluates the
+        *    first expression, which must result in a scalar Boolean."
+        */
+       op[0] = get_scalar_boolean_operand(instructions, state, this, 0,
+                                          "condition", &error_emitted);
+ 
+       /* The :? operator is implemented by generating an anonymous temporary
+        * followed by an if-statement.  The last instruction in each branch of
+        * the if-statement assigns a value to the anonymous temporary.  This
+        * temporary is the r-value of the expression.
+        */
+       exec_list then_instructions;
+       exec_list else_instructions;
+ 
+       op[1] = this->subexpressions[1]->hir(&then_instructions, state);
+       op[2] = this->subexpressions[2]->hir(&else_instructions, state);
+ 
+       /* From page 59 (page 65 of the PDF) of the GLSL 1.50 spec:
+        *
+        *     "The second and third expressions can be any type, as
+        *     long their types match, or there is a conversion in
+        *     Section 4.1.10 "Implicit Conversions" that can be applied
+        *     to one of the expressions to make their types match. This
+        *     resulting matching type is the type of the entire
+        *     expression."
+        */
+       if ((!apply_implicit_conversion(op[1]->type, op[2], state)
+           && !apply_implicit_conversion(op[2]->type, op[1], state))
+           || (op[1]->type != op[2]->type)) {
+          YYLTYPE loc = this->subexpressions[1]->get_location();
+ 
+          _mesa_glsl_error(& loc, state, "second and third operands of ?: "
+                           "operator must have matching types");
+          error_emitted = true;
+          type = glsl_type::error_type;
+       } else {
+          type = op[1]->type;
+       }
+ 
+       /* From page 33 (page 39 of the PDF) of the GLSL 1.10 spec:
+        *
+        *    "The second and third expressions must be the same type, but can
+        *    be of any type other than an array."
+        */
+       if (type->is_array() &&
+           !state->check_version(120, 300, &loc,
+                                 "second and third operands of ?: operator "
+                                 "cannot be arrays")) {
+          error_emitted = true;
+       }
+ 
+       /* From section 4.1.7 of the GLSL 4.50 spec (Opaque Types):
+        *
+        *  "Except for array indexing, structure member selection, and
+        *   parentheses, opaque variables are not allowed to be operands in
+        *   expressions; such use results in a compile-time error."
+        */
+       if (type->contains_opaque()) {
+          _mesa_glsl_error(&loc, state, "opaque variables cannot be operands "
+                           "of the ?: operator");
+          error_emitted = true;
+       }
+ 
+       ir_constant *cond_val = op[0]->constant_expression_value();
+ 
+       if (then_instructions.is_empty()
+           && else_instructions.is_empty()
+           && cond_val != NULL) {
+          result = cond_val->value.b[0] ? op[1] : op[2];
+       } else {
+          /* The copy to conditional_tmp reads the whole array. */
+          if (type->is_array()) {
+             mark_whole_array_access(op[1]);
+             mark_whole_array_access(op[2]);
+          }
+ 
+          ir_variable *const tmp =
+             new(ctx) ir_variable(type, "conditional_tmp", ir_var_temporary);
+          instructions->push_tail(tmp);
+ 
+          ir_if *const stmt = new(ctx) ir_if(op[0]);
+          instructions->push_tail(stmt);
+ 
+          then_instructions.move_nodes_to(& stmt->then_instructions);
+          ir_dereference *const then_deref =
+             new(ctx) ir_dereference_variable(tmp);
+          ir_assignment *const then_assign =
+             new(ctx) ir_assignment(then_deref, op[1]);
+          stmt->then_instructions.push_tail(then_assign);
+ 
+          else_instructions.move_nodes_to(& stmt->else_instructions);
+          ir_dereference *const else_deref =
+             new(ctx) ir_dereference_variable(tmp);
+          ir_assignment *const else_assign =
+             new(ctx) ir_assignment(else_deref, op[2]);
+          stmt->else_instructions.push_tail(else_assign);
+ 
+          result = new(ctx) ir_dereference_variable(tmp);
+       }
+       break;
+    }
+ 
+    case ast_pre_inc:
+    case ast_pre_dec: {
+       this->non_lvalue_description = (this->oper == ast_pre_inc)
+          ? "pre-increment operation" : "pre-decrement operation";
+ 
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+       op[1] = constant_one_for_inc_dec(ctx, op[0]->type);
+ 
+       type = arithmetic_result_type(op[0], op[1], false, state, & loc);
+ 
+       ir_rvalue *temp_rhs;
+       temp_rhs = new(ctx) ir_expression(operations[this->oper], type,
+                                         op[0], op[1]);
+ 
+       error_emitted =
+          do_assignment(instructions, state,
+                        this->subexpressions[0]->non_lvalue_description,
+                        op[0]->clone(ctx, NULL), temp_rhs,
+                        &result, needs_rvalue, false,
+                        this->subexpressions[0]->get_location());
+       break;
+    }
+ 
+    case ast_post_inc:
+    case ast_post_dec: {
+       this->non_lvalue_description = (this->oper == ast_post_inc)
+          ? "post-increment operation" : "post-decrement operation";
+       op[0] = this->subexpressions[0]->hir(instructions, state);
+       op[1] = constant_one_for_inc_dec(ctx, op[0]->type);
+ 
+       error_emitted = op[0]->type->is_error() || op[1]->type->is_error();
+ 
+       type = arithmetic_result_type(op[0], op[1], false, state, & loc);
+ 
+       ir_rvalue *temp_rhs;
+       temp_rhs = new(ctx) ir_expression(operations[this->oper], type,
+                                         op[0], op[1]);
+ 
+       /* Get a temporary of a copy of the lvalue before it's modified.
+        * This may get thrown away later.
+        */
+       result = get_lvalue_copy(instructions, op[0]->clone(ctx, NULL));
+ 
+       ir_rvalue *junk_rvalue;
+       error_emitted =
+          do_assignment(instructions, state,
+                        this->subexpressions[0]->non_lvalue_description,
+                        op[0]->clone(ctx, NULL), temp_rhs,
+                        &junk_rvalue, false, false,
+                        this->subexpressions[0]->get_location());
+ 
+       break;
+    }
+ 
+    case ast_field_selection:
+       result = _mesa_ast_field_selection_to_hir(this, instructions, state);
+       break;
+ 
+    case ast_array_index: {
+       YYLTYPE index_loc = subexpressions[1]->get_location();
+ 
+       op[0] = subexpressions[0]->hir(instructions, state);
+       op[1] = subexpressions[1]->hir(instructions, state);
+ 
+       result = _mesa_ast_array_index_to_hir(ctx, state, op[0], op[1],
+                                             loc, index_loc);
+ 
+       if (result->type->is_error())
+          error_emitted = true;
+ 
+       break;
+    }
+ 
+    case ast_unsized_array_dim:
+       assert(!"ast_unsized_array_dim: Should never get here.");
+       break;
+ 
+    case ast_function_call:
+       /* Should *NEVER* get here.  ast_function_call should always be handled
+        * by ast_function_expression::hir.
+        */
+       assert(0);
+       break;
+ 
+    case ast_identifier: {
+       /* ast_identifier can appear several places in a full abstract syntax
+        * tree.  This particular use must be at location specified in the grammar
+        * as 'variable_identifier'.
+        */
+       ir_variable *var =
+          state->symbols->get_variable(this->primary_expression.identifier);
+ 
+       if (var != NULL) {
+          var->data.used = true;
+          result = new(ctx) ir_dereference_variable(var);
+       } else {
+          _mesa_glsl_error(& loc, state, "`%s' undeclared",
+                           this->primary_expression.identifier);
+ 
+          result = ir_rvalue::error_value(ctx);
+          error_emitted = true;
+       }
+       break;
+    }
+ 
+    case ast_int_constant:
+       result = new(ctx) ir_constant(this->primary_expression.int_constant);
+       break;
+ 
+    case ast_uint_constant:
+       result = new(ctx) ir_constant(this->primary_expression.uint_constant);
+       break;
+ 
+    case ast_float_constant:
+       result = new(ctx) ir_constant(this->primary_expression.float_constant);
+       break;
+ 
+    case ast_bool_constant:
+       result = new(ctx) ir_constant(bool(this->primary_expression.bool_constant));
+       break;
+ 
+    case ast_double_constant:
+       result = new(ctx) ir_constant(this->primary_expression.double_constant);
+       break;
+ 
+    case ast_sequence: {
+       /* It should not be possible to generate a sequence in the AST without
+        * any expressions in it.
+        */
+       assert(!this->expressions.is_empty());
+ 
+       /* The r-value of a sequence is the last expression in the sequence.  If
+        * the other expressions in the sequence do not have side-effects (and
+        * therefore add instructions to the instruction list), they get dropped
+        * on the floor.
+        */
+       exec_node *previous_tail_pred = NULL;
+       YYLTYPE previous_operand_loc = loc;
+ 
+       foreach_list_typed (ast_node, ast, link, &this->expressions) {
+          /* If one of the operands of comma operator does not generate any
+           * code, we want to emit a warning.  At each pass through the loop
+           * previous_tail_pred will point to the last instruction in the
+           * stream *before* processing the previous operand.  Naturally,
+           * instructions->tail_pred will point to the last instruction in the
+           * stream *after* processing the previous operand.  If the two
+           * pointers match, then the previous operand had no effect.
+           *
+           * The warning behavior here differs slightly from GCC.  GCC will
+           * only emit a warning if none of the left-hand operands have an
+           * effect.  However, it will emit a warning for each.  I believe that
+           * there are some cases in C (especially with GCC extensions) where
+           * it is useful to have an intermediate step in a sequence have no
+           * effect, but I don't think these cases exist in GLSL.  Either way,
+           * it would be a giant hassle to replicate that behavior.
+           */
+          if (previous_tail_pred == instructions->tail_pred) {
+             _mesa_glsl_warning(&previous_operand_loc, state,
+                                "left-hand operand of comma expression has "
+                                "no effect");
+          }
+ 
+          /* tail_pred is directly accessed instead of using the get_tail()
+           * method for performance reasons.  get_tail() has extra code to
+           * return NULL when the list is empty.  We don't care about that
+           * here, so using tail_pred directly is fine.
+           */
+          previous_tail_pred = instructions->tail_pred;
+          previous_operand_loc = ast->get_location();
+ 
+          result = ast->hir(instructions, state);
+       }
+ 
+       /* Any errors should have already been emitted in the loop above.
+        */
+       error_emitted = true;
+       break;
+    }
+    }
+    type = NULL; /* use result->type, not type. */
+    assert(result != NULL || !needs_rvalue);
+ 
+    if (result && result->type->is_error() && !error_emitted)
+       _mesa_glsl_error(& loc, state, "type mismatch");
+ 
+    return result;
+ }
+ 
+ bool
+ ast_expression::has_sequence_subexpression() const
+ {
+    switch (this->oper) {
+    case ast_plus:
+    case ast_neg:
+    case ast_bit_not:
+    case ast_logic_not:
+    case ast_pre_inc:
+    case ast_pre_dec:
+    case ast_post_inc:
+    case ast_post_dec:
+       return this->subexpressions[0]->has_sequence_subexpression();
+ 
+    case ast_assign:
+    case ast_add:
+    case ast_sub:
+    case ast_mul:
+    case ast_div:
+    case ast_mod:
+    case ast_lshift:
+    case ast_rshift:
+    case ast_less:
+    case ast_greater:
+    case ast_lequal:
+    case ast_gequal:
+    case ast_nequal:
+    case ast_equal:
+    case ast_bit_and:
+    case ast_bit_xor:
+    case ast_bit_or:
+    case ast_logic_and:
+    case ast_logic_or:
+    case ast_logic_xor:
+    case ast_array_index:
+    case ast_mul_assign:
+    case ast_div_assign:
+    case ast_add_assign:
+    case ast_sub_assign:
+    case ast_mod_assign:
+    case ast_ls_assign:
+    case ast_rs_assign:
+    case ast_and_assign:
+    case ast_xor_assign:
+    case ast_or_assign:
+       return this->subexpressions[0]->has_sequence_subexpression() ||
+              this->subexpressions[1]->has_sequence_subexpression();
+ 
+    case ast_conditional:
+       return this->subexpressions[0]->has_sequence_subexpression() ||
+              this->subexpressions[1]->has_sequence_subexpression() ||
+              this->subexpressions[2]->has_sequence_subexpression();
+ 
+    case ast_sequence:
+       return true;
+ 
+    case ast_field_selection:
+    case ast_identifier:
+    case ast_int_constant:
+    case ast_uint_constant:
+    case ast_float_constant:
+    case ast_bool_constant:
+    case ast_double_constant:
+       return false;
+ 
+    case ast_aggregate:
+       unreachable("ast_aggregate: Should never get here.");
+ 
+    case ast_function_call:
+       unreachable("should be handled by ast_function_expression::hir");
+ 
+    case ast_unsized_array_dim:
+       unreachable("ast_unsized_array_dim: Should never get here.");
+    }
+ 
+    return false;
+ }
+ 
+ ir_rvalue *
+ ast_expression_statement::hir(exec_list *instructions,
+                               struct _mesa_glsl_parse_state *state)
+ {
+    /* It is possible to have expression statements that don't have an
+     * expression.  This is the solitary semicolon:
+     *
+     * for (i = 0; i < 5; i++)
+     *     ;
+     *
+     * In this case the expression will be NULL.  Test for NULL and don't do
+     * anything in that case.
+     */
+    if (expression != NULL)
+       expression->hir_no_rvalue(instructions, state);
+ 
+    /* Statements do not have r-values.
+     */
+    return NULL;
+ }
+ 
+ 
+ ir_rvalue *
+ ast_compound_statement::hir(exec_list *instructions,
+                             struct _mesa_glsl_parse_state *state)
+ {
+    if (new_scope)
+       state->symbols->push_scope();
+ 
+    foreach_list_typed (ast_node, ast, link, &this->statements)
+       ast->hir(instructions, state);
+ 
+    if (new_scope)
+       state->symbols->pop_scope();
+ 
+    /* Compound statements do not have r-values.
+     */
+    return NULL;
+ }
+ 
+ /**
+  * Evaluate the given exec_node (which should be an ast_node representing
+  * a single array dimension) and return its integer value.
+  */
+ static unsigned
+ process_array_size(exec_node *node,
+                    struct _mesa_glsl_parse_state *state)
+ {
+    exec_list dummy_instructions;
+ 
+    ast_node *array_size = exec_node_data(ast_node, node, link);
+ 
+    /**
+     * Dimensions other than the outermost dimension can by unsized if they
+     * are immediately sized by a constructor or initializer.
+     */
+    if (((ast_expression*)array_size)->oper == ast_unsized_array_dim)
+       return 0;
+ 
+    ir_rvalue *const ir = array_size->hir(& dummy_instructions, state);
+    YYLTYPE loc = array_size->get_location();
+ 
+    if (ir == NULL) {
+       _mesa_glsl_error(& loc, state,
+                        "array size could not be resolved");
+       return 0;
+    }
+ 
+    if (!ir->type->is_integer()) {
+       _mesa_glsl_error(& loc, state,
+                        "array size must be integer type");
+       return 0;
+    }
+ 
+    if (!ir->type->is_scalar()) {
+       _mesa_glsl_error(& loc, state,
+                        "array size must be scalar type");
+       return 0;
+    }
+ 
+    ir_constant *const size = ir->constant_expression_value();
+    if (size == NULL || array_size->has_sequence_subexpression()) {
+       _mesa_glsl_error(& loc, state, "array size must be a "
+                        "constant valued expression");
+       return 0;
+    }
+ 
+    if (size->value.i[0] <= 0) {
+       _mesa_glsl_error(& loc, state, "array size must be > 0");
+       return 0;
+    }
+ 
+    assert(size->type == ir->type);
+ 
+    /* If the array size is const (and we've verified that
+     * it is) then no instructions should have been emitted
+     * when we converted it to HIR. If they were emitted,
+     * then either the array size isn't const after all, or
+     * we are emitting unnecessary instructions.
+     */
+    assert(dummy_instructions.is_empty());
+ 
+    return size->value.u[0];
+ }
+ 
+ static const glsl_type *
+ process_array_type(YYLTYPE *loc, const glsl_type *base,
+                    ast_array_specifier *array_specifier,
+                    struct _mesa_glsl_parse_state *state)
+ {
+    const glsl_type *array_type = base;
+ 
+    if (array_specifier != NULL) {
+       if (base->is_array()) {
+ 
+          /* From page 19 (page 25) of the GLSL 1.20 spec:
+           *
+           * "Only one-dimensional arrays may be declared."
+           */
+          if (!state->check_arrays_of_arrays_allowed(loc)) {
+             return glsl_type::error_type;
+          }
+       }
+ 
+       for (exec_node *node = array_specifier->array_dimensions.tail_pred;
+            !node->is_head_sentinel(); node = node->prev) {
+          unsigned array_size = process_array_size(node, state);
+          array_type = glsl_type::get_array_instance(array_type, array_size);
+       }
+    }
+ 
+    return array_type;
+ }
+ 
+ static bool
+ precision_qualifier_allowed(const glsl_type *type)
+ {
+    /* Precision qualifiers apply to floating point, integer and opaque
+     * types.
+     *
+     * Section 4.5.2 (Precision Qualifiers) of the GLSL 1.30 spec says:
+     *    "Any floating point or any integer declaration can have the type
+     *    preceded by one of these precision qualifiers [...] Literal
+     *    constants do not have precision qualifiers. Neither do Boolean
+     *    variables.
+     *
+     * Section 4.5 (Precision and Precision Qualifiers) of the GLSL 1.30
+     * spec also says:
+     *
+     *     "Precision qualifiers are added for code portability with OpenGL
+     *     ES, not for functionality. They have the same syntax as in OpenGL
+     *     ES."
+     *
+     * Section 8 (Built-In Functions) of the GLSL ES 1.00 spec says:
+     *
+     *     "uniform lowp sampler2D sampler;
+     *     highp vec2 coord;
+     *     ...
+     *     lowp vec4 col = texture2D (sampler, coord);
+     *                                            // texture2D returns lowp"
+     *
+     * From this, we infer that GLSL 1.30 (and later) should allow precision
+     * qualifiers on sampler types just like float and integer types.
+     */
+    return (type->is_float()
+        || type->is_integer()
+        || type->contains_opaque())
+        && !type->without_array()->is_record();
+ }
+ 
+ const glsl_type *
+ ast_type_specifier::glsl_type(const char **name,
+                               struct _mesa_glsl_parse_state *state) const
+ {
+    const struct glsl_type *type;
+ 
+    type = state->symbols->get_type(this->type_name);
+    *name = this->type_name;
+ 
+    YYLTYPE loc = this->get_location();
+    type = process_array_type(&loc, type, this->array_specifier, state);
+ 
+    return type;
+ }
+ 
+ /**
+  * From the OpenGL ES 3.0 spec, 4.5.4 Default Precision Qualifiers:
+  *
+  * "The precision statement
+  *
+  *    precision precision-qualifier type;
+  *
+  *  can be used to establish a default precision qualifier. The type field can
+  *  be either int or float or any of the sampler types, (...) If type is float,
+  *  the directive applies to non-precision-qualified floating point type
+  *  (scalar, vector, and matrix) declarations. If type is int, the directive
+  *  applies to all non-precision-qualified integer type (scalar, vector, signed,
+  *  and unsigned) declarations."
+  *
+  * We use the symbol table to keep the values of the default precisions for
+  * each 'type' in each scope and we use the 'type' string from the precision
+  * statement as key in the symbol table. When we want to retrieve the default
+  * precision associated with a given glsl_type we need to know the type string
+  * associated with it. This is what this function returns.
+  */
+ static const char *
+ get_type_name_for_precision_qualifier(const glsl_type *type)
+ {
+    switch (type->base_type) {
+    case GLSL_TYPE_FLOAT:
+       return "float";
+    case GLSL_TYPE_UINT:
+    case GLSL_TYPE_INT:
+       return "int";
+    case GLSL_TYPE_ATOMIC_UINT:
+       return "atomic_uint";
+    case GLSL_TYPE_IMAGE:
+    /* fallthrough */
+    case GLSL_TYPE_SAMPLER: {
+       const unsigned type_idx =
+          type->sampler_array + 2 * type->sampler_shadow;
+       const unsigned offset = type->base_type == GLSL_TYPE_SAMPLER ? 0 : 4;
+       assert(type_idx < 4);
+       switch (type->sampler_type) {
+       case GLSL_TYPE_FLOAT:
+          switch (type->sampler_dimensionality) {
+          case GLSL_SAMPLER_DIM_1D: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "sampler1D", "sampler1DArray",
+               "sampler1DShadow", "sampler1DArrayShadow"
+             };
+             return names[type_idx];
+          }
+          case GLSL_SAMPLER_DIM_2D: {
+             static const char *const names[8] = {
+               "sampler2D", "sampler2DArray",
+               "sampler2DShadow", "sampler2DArrayShadow",
+               "image2D", "image2DArray", NULL, NULL
+             };
+             return names[offset + type_idx];
+          }
+          case GLSL_SAMPLER_DIM_3D: {
+             static const char *const names[8] = {
+               "sampler3D", NULL, NULL, NULL,
+               "image3D", NULL, NULL, NULL
+             };
+             return names[offset + type_idx];
+          }
+          case GLSL_SAMPLER_DIM_CUBE: {
+             static const char *const names[8] = {
+               "samplerCube", "samplerCubeArray",
+               "samplerCubeShadow", "samplerCubeArrayShadow",
+               "imageCube", NULL, NULL, NULL
+             };
+             return names[offset + type_idx];
+          }
+          case GLSL_SAMPLER_DIM_MS: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "sampler2DMS", "sampler2DMSArray", NULL, NULL
+             };
+             return names[type_idx];
+          }
+          case GLSL_SAMPLER_DIM_RECT: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "samplerRect", NULL, "samplerRectShadow", NULL
+             };
+             return names[type_idx];
+          }
+          case GLSL_SAMPLER_DIM_BUF: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "samplerBuffer", NULL, NULL, NULL
+             };
+             return names[type_idx];
+          }
+          case GLSL_SAMPLER_DIM_EXTERNAL: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "samplerExternalOES", NULL, NULL, NULL
+             };
+             return names[type_idx];
+          }
+          default:
+             unreachable("Unsupported sampler/image dimensionality");
+          } /* sampler/image float dimensionality */
+          break;
+       case GLSL_TYPE_INT:
+          switch (type->sampler_dimensionality) {
+          case GLSL_SAMPLER_DIM_1D: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "isampler1D", "isampler1DArray", NULL, NULL
+             };
+             return names[type_idx];
+          }
+          case GLSL_SAMPLER_DIM_2D: {
+             static const char *const names[8] = {
+               "isampler2D", "isampler2DArray", NULL, NULL,
+               "iimage2D", "iimage2DArray", NULL, NULL
+             };
+             return names[offset + type_idx];
+          }
+          case GLSL_SAMPLER_DIM_3D: {
+             static const char *const names[8] = {
+               "isampler3D", NULL, NULL, NULL,
+               "iimage3D", NULL, NULL, NULL
+             };
+             return names[offset + type_idx];
+          }
+          case GLSL_SAMPLER_DIM_CUBE: {
+             static const char *const names[8] = {
+               "isamplerCube", "isamplerCubeArray", NULL, NULL,
+               "iimageCube", NULL, NULL, NULL
+             };
+             return names[offset + type_idx];
+          }
+          case GLSL_SAMPLER_DIM_MS: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "isampler2DMS", "isampler2DMSArray", NULL, NULL
+             };
+             return names[type_idx];
+          }
+          case GLSL_SAMPLER_DIM_RECT: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "isamplerRect", NULL, "isamplerRectShadow", NULL
+             };
+             return names[type_idx];
+          }
+          case GLSL_SAMPLER_DIM_BUF: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "isamplerBuffer", NULL, NULL, NULL
+             };
+             return names[type_idx];
+          }
+          default:
+             unreachable("Unsupported isampler/iimage dimensionality");
+          } /* sampler/image int dimensionality */
+          break;
+       case GLSL_TYPE_UINT:
+          switch (type->sampler_dimensionality) {
+          case GLSL_SAMPLER_DIM_1D: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "usampler1D", "usampler1DArray", NULL, NULL
+             };
+             return names[type_idx];
+          }
+          case GLSL_SAMPLER_DIM_2D: {
+             static const char *const names[8] = {
+               "usampler2D", "usampler2DArray", NULL, NULL,
+               "uimage2D", "uimage2DArray", NULL, NULL
+             };
+             return names[offset + type_idx];
+          }
+          case GLSL_SAMPLER_DIM_3D: {
+             static const char *const names[8] = {
+               "usampler3D", NULL, NULL, NULL,
+               "uimage3D", NULL, NULL, NULL
+             };
+             return names[offset + type_idx];
+          }
+          case GLSL_SAMPLER_DIM_CUBE: {
+             static const char *const names[8] = {
+               "usamplerCube", "usamplerCubeArray", NULL, NULL,
+               "uimageCube", NULL, NULL, NULL
+             };
+             return names[offset + type_idx];
+          }
+          case GLSL_SAMPLER_DIM_MS: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "usampler2DMS", "usampler2DMSArray", NULL, NULL
+             };
+             return names[type_idx];
+          }
+          case GLSL_SAMPLER_DIM_RECT: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "usamplerRect", NULL, "usamplerRectShadow", NULL
+             };
+             return names[type_idx];
+          }
+          case GLSL_SAMPLER_DIM_BUF: {
+             assert(type->base_type == GLSL_TYPE_SAMPLER);
+             static const char *const names[4] = {
+               "usamplerBuffer", NULL, NULL, NULL
+             };
+             return names[type_idx];
+          }
+          default:
+             unreachable("Unsupported usampler/uimage dimensionality");
+          } /* sampler/image uint dimensionality */
+          break;
+       default:
+          unreachable("Unsupported sampler/image type");
+       } /* sampler/image type */
+       break;
+    } /* GLSL_TYPE_SAMPLER/GLSL_TYPE_IMAGE */
+    break;
+    default:
+       unreachable("Unsupported type");
+    } /* base type */
+ }
+ 
+ static unsigned
+ select_gles_precision(unsigned qual_precision,
+                       const glsl_type *type,
+                       struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+ {
+    /* Precision qualifiers do not have any meaning in Desktop GLSL.
+     * In GLES we take the precision from the type qualifier if present,
+     * otherwise, if the type of the variable allows precision qualifiers at
+     * all, we look for the default precision qualifier for that type in the
+     * current scope.
+     */
+    assert(state->es_shader);
+ 
+    unsigned precision = GLSL_PRECISION_NONE;
+    if (qual_precision) {
+       precision = qual_precision;
+    } else if (precision_qualifier_allowed(type)) {
+       const char *type_name =
+          get_type_name_for_precision_qualifier(type->without_array());
+       assert(type_name != NULL);
+ 
+       precision =
+          state->symbols->get_default_precision_qualifier(type_name);
+       if (precision == ast_precision_none) {
+          _mesa_glsl_error(loc, state,
+                           "No precision specified in this scope for type `%s'",
+                           type->name);
+       }
+    }
+    return precision;
+ }
+ 
+ const glsl_type *
+ ast_fully_specified_type::glsl_type(const char **name,
+                                     struct _mesa_glsl_parse_state *state) const
+ {
+    return this->specifier->glsl_type(name, state);
+ }
+ 
+ /**
+  * Determine whether a toplevel variable declaration declares a varying.  This
+  * function operates by examining the variable's mode and the shader target,
+  * so it correctly identifies linkage variables regardless of whether they are
+  * declared using the deprecated "varying" syntax or the new "in/out" syntax.
+  *
+  * Passing a non-toplevel variable declaration (e.g. a function parameter) to
+  * this function will produce undefined results.
+  */
+ static bool
+ is_varying_var(ir_variable *var, gl_shader_stage target)
+ {
+    switch (target) {
+    case MESA_SHADER_VERTEX:
+       return var->data.mode == ir_var_shader_out;
+    case MESA_SHADER_FRAGMENT:
+       return var->data.mode == ir_var_shader_in;
+    default:
+       return var->data.mode == ir_var_shader_out || var->data.mode == ir_var_shader_in;
+    }
+ }
+ 
+ 
+ /**
+  * Matrix layout qualifiers are only allowed on certain types
+  */
+ static void
+ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
+                                 YYLTYPE *loc,
+                                 const glsl_type *type,
+                                 ir_variable *var)
+ {
+    if (var && !var->is_in_buffer_block()) {
+       /* Layout qualifiers may only apply to interface blocks and fields in
+        * them.
+        */
+       _mesa_glsl_error(loc, state,
+                        "uniform block layout qualifiers row_major and "
+                        "column_major may not be applied to variables "
+                        "outside of uniform blocks");
+    } else if (!type->without_array()->is_matrix()) {
+       /* The OpenGL ES 3.0 conformance tests did not originally allow
+        * matrix layout qualifiers on non-matrices.  However, the OpenGL
+        * 4.4 and OpenGL ES 3.0 (revision TBD) specifications were
+        * amended to specifically allow these layouts on all types.  Emit
+        * a warning so that people know their code may not be portable.
+        */
+       _mesa_glsl_warning(loc, state,
+                          "uniform block layout qualifiers row_major and "
+                          "column_major applied to non-matrix types may "
+                          "be rejected by older compilers");
+    }
+ }
+ 
+ static bool
+ process_qualifier_constant(struct _mesa_glsl_parse_state *state,
+                            YYLTYPE *loc,
+                            const char *qual_indentifier,
+                            ast_expression *const_expression,
+                            unsigned *value)
+ {
+    exec_list dummy_instructions;
+ 
+    if (const_expression == NULL) {
+       *value = 0;
+       return true;
+    }
+ 
+    ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
+ 
+    ir_constant *const const_int = ir->constant_expression_value();
+    if (const_int == NULL || !const_int->type->is_integer()) {
+       _mesa_glsl_error(loc, state, "%s must be an integral constant "
+                        "expression", qual_indentifier);
+       return false;
+    }
+ 
+    if (const_int->value.i[0] < 0) {
+       _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)",
+                        qual_indentifier, const_int->value.u[0]);
+       return false;
+    }
+ 
+    /* If the location is const (and we've verified that
+     * it is) then no instructions should have been emitted
+     * when we converted it to HIR. If they were emitted,
+     * then either the location isn't const after all, or
+     * we are emitting unnecessary instructions.
+     */
+    assert(dummy_instructions.is_empty());
+ 
+    *value = const_int->value.u[0];
+    return true;
+ }
+ 
+ static bool
+ validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state,
+                           unsigned stream)
+ {
+    if (stream >= state->ctx->Const.MaxVertexStreams) {
+       _mesa_glsl_error(loc, state,
+                        "invalid stream specified %d is larger than "
+                        "MAX_VERTEX_STREAMS - 1 (%d).",
+                        stream, state->ctx->Const.MaxVertexStreams - 1);
+       return false;
+    }
+ 
+    return true;
+ }
+ 
+ static void
+ apply_explicit_binding(struct _mesa_glsl_parse_state *state,
+                        YYLTYPE *loc,
+                        ir_variable *var,
+                        const glsl_type *type,
+                        const ast_type_qualifier *qual)
+ {
+    if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
+       _mesa_glsl_error(loc, state,
+                        "the \"binding\" qualifier only applies to uniforms and "
+                        "shader storage buffer objects");
+       return;
+    }
+ 
+    unsigned qual_binding;
+    if (!process_qualifier_constant(state, loc, "binding", qual->binding,
+                                    &qual_binding)) {
+       return;
+    }
+ 
+    const struct gl_context *const ctx = state->ctx;
+    unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1;
+    unsigned max_index = qual_binding + elements - 1;
+    const glsl_type *base_type = type->without_array();
+ 
+    if (base_type->is_interface()) {
+       /* UBOs.  From page 60 of the GLSL 4.20 specification:
+        * "If the binding point for any uniform block instance is less than zero,
+        *  or greater than or equal to the implementation-dependent maximum
+        *  number of uniform buffer bindings, a compilation error will occur.
+        *  When the binding identifier is used with a uniform block instanced as
+        *  an array of size N, all elements of the array from binding through
+        *  binding + N – 1 must be within this range."
+        *
+        * The implementation-dependent maximum is GL_MAX_UNIFORM_BUFFER_BINDINGS.
+        */
+       if (qual->flags.q.uniform &&
+          max_index >= ctx->Const.MaxUniformBufferBindings) {
+          _mesa_glsl_error(loc, state, "layout(binding = %u) for %d UBOs exceeds "
+                           "the maximum number of UBO binding points (%d)",
+                           qual_binding, elements,
+                           ctx->Const.MaxUniformBufferBindings);
+          return;
+       }
+ 
+       /* SSBOs. From page 67 of the GLSL 4.30 specification:
+        * "If the binding point for any uniform or shader storage block instance
+        *  is less than zero, or greater than or equal to the
+        *  implementation-dependent maximum number of uniform buffer bindings, a
+        *  compile-time error will occur. When the binding identifier is used
+        *  with a uniform or shader storage block instanced as an array of size
+        *  N, all elements of the array from binding through binding + N – 1 must
+        *  be within this range."
+        */
+       if (qual->flags.q.buffer &&
+          max_index >= ctx->Const.MaxShaderStorageBufferBindings) {
+          _mesa_glsl_error(loc, state, "layout(binding = %u) for %d SSBOs exceeds "
+                           "the maximum number of SSBO binding points (%d)",
+                           qual_binding, elements,
+                           ctx->Const.MaxShaderStorageBufferBindings);
+          return;
+       }
+    } else if (base_type->is_sampler()) {
+       /* Samplers.  From page 63 of the GLSL 4.20 specification:
+        * "If the binding is less than zero, or greater than or equal to the
+        *  implementation-dependent maximum supported number of units, a
+        *  compilation error will occur. When the binding identifier is used
+        *  with an array of size N, all elements of the array from binding
+        *  through binding + N - 1 must be within this range."
+        */
+       unsigned limit = ctx->Const.MaxCombinedTextureImageUnits;
+ 
+       if (max_index >= limit) {
+          _mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers "
+                           "exceeds the maximum number of texture image units "
+                           "(%u)", qual_binding, elements, limit);
+ 
+          return;
+       }
+    } else if (base_type->contains_atomic()) {
+       assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS);
+       if (qual_binding >= ctx->Const.MaxAtomicBufferBindings) {
+          _mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the "
+                           " maximum number of atomic counter buffer bindings"
+                           "(%u)", qual_binding,
+                           ctx->Const.MaxAtomicBufferBindings);
+ 
+          return;
+       }
+    } else if ((state->is_version(420, 310) ||
+                state->ARB_shading_language_420pack_enable) &&
+               base_type->is_image()) {
+       assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS);
+       if (max_index >= ctx->Const.MaxImageUnits) {
+          _mesa_glsl_error(loc, state, "Image binding %d exceeds the "
+                           " maximum number of image units (%d)", max_index,
+                           ctx->Const.MaxImageUnits);
+          return;
+       }
+ 
+    } else {
+       _mesa_glsl_error(loc, state,
+                        "the \"binding\" qualifier only applies to uniform "
+                        "blocks, opaque variables, or arrays thereof");
+       return;
+    }
+ 
+    var->data.explicit_binding = true;
+    var->data.binding = qual_binding;
+ 
+    return;
+ }
+ 
+ 
+ static glsl_interp_qualifier
+ interpret_interpolation_qualifier(const struct ast_type_qualifier *qual,
+                                   ir_variable_mode mode,
+                                   struct _mesa_glsl_parse_state *state,
+                                   YYLTYPE *loc)
+ {
+    glsl_interp_qualifier interpolation;
+    if (qual->flags.q.flat)
+       interpolation = INTERP_QUALIFIER_FLAT;
+    else if (qual->flags.q.noperspective)
+       interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
+    else if (qual->flags.q.smooth)
+       interpolation = INTERP_QUALIFIER_SMOOTH;
+    else
+       interpolation = INTERP_QUALIFIER_NONE;
+ 
+    if (interpolation != INTERP_QUALIFIER_NONE) {
+       if (mode != ir_var_shader_in && mode != ir_var_shader_out) {
+          _mesa_glsl_error(loc, state,
+                           "interpolation qualifier `%s' can only be applied to "
+                           "shader inputs or outputs.",
+                           interpolation_string(interpolation));
+ 
+       }
+ 
+       if ((state->stage == MESA_SHADER_VERTEX && mode == ir_var_shader_in) ||
+           (state->stage == MESA_SHADER_FRAGMENT && mode == ir_var_shader_out)) {
+          _mesa_glsl_error(loc, state,
+                           "interpolation qualifier `%s' cannot be applied to "
+                           "vertex shader inputs or fragment shader outputs",
+                           interpolation_string(interpolation));
+       }
+    }
+ 
+    return interpolation;
+ }
+ 
+ 
+ static void
+ apply_explicit_location(const struct ast_type_qualifier *qual,
+                         ir_variable *var,
+                         struct _mesa_glsl_parse_state *state,
+                         YYLTYPE *loc)
+ {
+    bool fail = false;
+ 
+    unsigned qual_location;
+    if (!process_qualifier_constant(state, loc, "location", qual->location,
+                                    &qual_location)) {
+       return;
+    }
+ 
+    /* Checks for GL_ARB_explicit_uniform_location. */
+    if (qual->flags.q.uniform) {
+       if (!state->check_explicit_uniform_location_allowed(loc, var))
+          return;
+ 
+       const struct gl_context *const ctx = state->ctx;
+       unsigned max_loc = qual_location + var->type->uniform_locations() - 1;
+ 
+       if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) {
+          _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s "
+                           ">= MAX_UNIFORM_LOCATIONS (%u)", var->name,
+                           ctx->Const.MaxUserAssignableUniformLocations);
+          return;
+       }
+ 
+       var->data.explicit_location = true;
+       var->data.location = qual_location;
+       return;
+    }
+ 
+    /* Between GL_ARB_explicit_attrib_location an
+     * GL_ARB_separate_shader_objects, the inputs and outputs of any shader
+     * stage can be assigned explicit locations.  The checking here associates
+     * the correct extension with the correct stage's input / output:
+     *
+     *                     input            output
+     *                     -----            ------
+     * vertex              explicit_loc     sso
+     * tess control        sso              sso
+     * tess eval           sso              sso
+     * geometry            sso              sso
+     * fragment            sso              explicit_loc
+     */
+    switch (state->stage) {
+    case MESA_SHADER_VERTEX:
+       if (var->data.mode == ir_var_shader_in) {
+          if (!state->check_explicit_attrib_location_allowed(loc, var))
+             return;
+ 
+          break;
+       }
+ 
+       if (var->data.mode == ir_var_shader_out) {
+          if (!state->check_separate_shader_objects_allowed(loc, var))
+             return;
+ 
+          break;
+       }
+ 
+       fail = true;
+       break;
+ 
+    case MESA_SHADER_TESS_CTRL:
+    case MESA_SHADER_TESS_EVAL:
+    case MESA_SHADER_GEOMETRY:
+       if (var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out) {
+          if (!state->check_separate_shader_objects_allowed(loc, var))
+             return;
+ 
+          break;
+       }
+ 
+       fail = true;
+       break;
+ 
+    case MESA_SHADER_FRAGMENT:
+       if (var->data.mode == ir_var_shader_in) {
+          if (!state->check_separate_shader_objects_allowed(loc, var))
+             return;
+ 
+          break;
+       }
+ 
+       if (var->data.mode == ir_var_shader_out) {
+          if (!state->check_explicit_attrib_location_allowed(loc, var))
+             return;
+ 
+          break;
+       }
+ 
+       fail = true;
+       break;
+ 
+    case MESA_SHADER_COMPUTE:
+       _mesa_glsl_error(loc, state,
+                        "compute shader variables cannot be given "
+                        "explicit locations");
+       return;
+    };
+ 
+    if (fail) {
+       _mesa_glsl_error(loc, state,
+                        "%s cannot be given an explicit location in %s shader",
+                        mode_string(var),
+       _mesa_shader_stage_to_string(state->stage));
+    } else {
+       var->data.explicit_location = true;
+ 
+       switch (state->stage) {
+       case MESA_SHADER_VERTEX:
+          var->data.location = (var->data.mode == ir_var_shader_in)
+             ? (qual_location + VERT_ATTRIB_GENERIC0)
+             : (qual_location + VARYING_SLOT_VAR0);
+          break;
+ 
+       case MESA_SHADER_TESS_CTRL:
+       case MESA_SHADER_TESS_EVAL:
+       case MESA_SHADER_GEOMETRY:
+          if (var->data.patch)
+             var->data.location = qual_location + VARYING_SLOT_PATCH0;
+          else
+             var->data.location = qual_location + VARYING_SLOT_VAR0;
+          break;
+ 
+       case MESA_SHADER_FRAGMENT:
+          var->data.location = (var->data.mode == ir_var_shader_out)
+             ? (qual_location + FRAG_RESULT_DATA0)
+             : (qual_location + VARYING_SLOT_VAR0);
+          break;
+       case MESA_SHADER_COMPUTE:
+          assert(!"Unexpected shader type");
+          break;
+       }
+ 
+       /* Check if index was set for the uniform instead of the function */
+       if (qual->flags.q.explicit_index && qual->flags.q.subroutine) {
+          _mesa_glsl_error(loc, state, "an index qualifier can only be "
+                           "used with subroutine functions");
+          return;
+       }
+ 
+       unsigned qual_index;
+       if (qual->flags.q.explicit_index &&
+           process_qualifier_constant(state, loc, "index", qual->index,
+                                      &qual_index)) {
+          /* From the GLSL 4.30 specification, section 4.4.2 (Output
+           * Layout Qualifiers):
+           *
+           * "It is also a compile-time error if a fragment shader
+           *  sets a layout index to less than 0 or greater than 1."
+           *
+           * Older specifications don't mandate a behavior; we take
+           * this as a clarification and always generate the error.
+           */
+          if (qual_index > 1) {
+             _mesa_glsl_error(loc, state,
+                              "explicit index may only be 0 or 1");
+          } else {
+             var->data.explicit_index = true;
+             var->data.index = qual_index;
+          }
+       }
+    }
+ }
+ 
+ static void
+ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual,
+                                   ir_variable *var,
+                                   struct _mesa_glsl_parse_state *state,
+                                   YYLTYPE *loc)
+ {
+    const glsl_type *base_type = var->type->without_array();
+ 
+    if (base_type->is_image()) {
+       if (var->data.mode != ir_var_uniform &&
+           var->data.mode != ir_var_function_in) {
+          _mesa_glsl_error(loc, state, "image variables may only be declared as "
+                           "function parameters or uniform-qualified "
+                           "global variables");
+       }
+ 
+       var->data.image_read_only |= qual->flags.q.read_only;
+       var->data.image_write_only |= qual->flags.q.write_only;
+       var->data.image_coherent |= qual->flags.q.coherent;
+       var->data.image_volatile |= qual->flags.q._volatile;
+       var->data.image_restrict |= qual->flags.q.restrict_flag;
+       var->data.read_only = true;
+ 
+       if (qual->flags.q.explicit_image_format) {
+          if (var->data.mode == ir_var_function_in) {
+             _mesa_glsl_error(loc, state, "format qualifiers cannot be "
+                              "used on image function parameters");
+          }
+ 
+          if (qual->image_base_type != base_type->sampler_type) {
+             _mesa_glsl_error(loc, state, "format qualifier doesn't match the "
+                              "base data type of the image");
+          }
+ 
+          var->data.image_format = qual->image_format;
+       } else {
+          if (var->data.mode == ir_var_uniform) {
+             if (state->es_shader) {
+                _mesa_glsl_error(loc, state, "all image uniforms "
+                                 "must have a format layout qualifier");
+ 
+             } else if (!qual->flags.q.write_only) {
+                _mesa_glsl_error(loc, state, "image uniforms not qualified with "
+                                 "`writeonly' must have a format layout "
+                                 "qualifier");
+             }
+          }
+ 
+          var->data.image_format = GL_NONE;
+       }
+ 
+       /* From page 70 of the GLSL ES 3.1 specification:
+        *
+        * "Except for image variables qualified with the format qualifiers
+        *  r32f, r32i, and r32ui, image variables must specify either memory
+        *  qualifier readonly or the memory qualifier writeonly."
+        */
+       if (state->es_shader &&
+           var->data.image_format != GL_R32F &&
+           var->data.image_format != GL_R32I &&
+           var->data.image_format != GL_R32UI &&
+           !var->data.image_read_only &&
+           !var->data.image_write_only) {
+          _mesa_glsl_error(loc, state, "image variables of format other than "
+                           "r32f, r32i or r32ui must be qualified `readonly' or "
+                           "`writeonly'");
+       }
+ 
+    } else if (qual->flags.q.read_only ||
+               qual->flags.q.write_only ||
+               qual->flags.q.coherent ||
+               qual->flags.q._volatile ||
+               qual->flags.q.restrict_flag ||
+               qual->flags.q.explicit_image_format) {
+       _mesa_glsl_error(loc, state, "memory qualifiers may only be applied to "
+                        "images");
+    }
+ }
+ 
+ static inline const char*
+ get_layout_qualifier_string(bool origin_upper_left, bool pixel_center_integer)
+ {
+    if (origin_upper_left && pixel_center_integer)
+       return "origin_upper_left, pixel_center_integer";
+    else if (origin_upper_left)
+       return "origin_upper_left";
+    else if (pixel_center_integer)
+       return "pixel_center_integer";
+    else
+       return " ";
+ }
+ 
+ static inline bool
+ is_conflicting_fragcoord_redeclaration(struct _mesa_glsl_parse_state *state,
+                                        const struct ast_type_qualifier *qual)
+ {
+    /* If gl_FragCoord was previously declared, and the qualifiers were
+     * different in any way, return true.
+     */
+    if (state->fs_redeclares_gl_fragcoord) {
+       return (state->fs_pixel_center_integer != qual->flags.q.pixel_center_integer
+          || state->fs_origin_upper_left != qual->flags.q.origin_upper_left);
+    }
+ 
+    return false;
+ }
+ 
+ static inline void
+ validate_array_dimensions(const glsl_type *t,
+                           struct _mesa_glsl_parse_state *state,
+                           YYLTYPE *loc) {
+    if (t->is_array()) {
+       t = t->fields.array;
+       while (t->is_array()) {
+          if (t->is_unsized_array()) {
+             _mesa_glsl_error(loc, state,
+                              "only the outermost array dimension can "
+                              "be unsized",
+                              t->name);
+             break;
+          }
+          t = t->fields.array;
+       }
+    }
+ }
+ 
+ static void
+ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
+                                    ir_variable *var,
+                                    struct _mesa_glsl_parse_state *state,
+                                    YYLTYPE *loc)
+ {
+    if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) {
+ 
+       /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says:
+        *
+        *    "Within any shader, the first redeclarations of gl_FragCoord
+        *     must appear before any use of gl_FragCoord."
+        *
+        * Generate a compiler error if above condition is not met by the
+        * fragment shader.
+        */
+       ir_variable *earlier = state->symbols->get_variable("gl_FragCoord");
+       if (earlier != NULL &&
+           earlier->data.used &&
+           !state->fs_redeclares_gl_fragcoord) {
+          _mesa_glsl_error(loc, state,
+                           "gl_FragCoord used before its first redeclaration "
+                           "in fragment shader");
+       }
+ 
+       /* Make sure all gl_FragCoord redeclarations specify the same layout
+        * qualifiers.
+        */
+       if (is_conflicting_fragcoord_redeclaration(state, qual)) {
+          const char *const qual_string =
+             get_layout_qualifier_string(qual->flags.q.origin_upper_left,
+                                         qual->flags.q.pixel_center_integer);
+ 
+          const char *const state_string =
+             get_layout_qualifier_string(state->fs_origin_upper_left,
+                                         state->fs_pixel_center_integer);
+ 
+          _mesa_glsl_error(loc, state,
+                           "gl_FragCoord redeclared with different layout "
+                           "qualifiers (%s) and (%s) ",
+                           state_string,
+                           qual_string);
+       }
+       state->fs_origin_upper_left = qual->flags.q.origin_upper_left;
+       state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer;
+       state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers =
+          !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer;
+       state->fs_redeclares_gl_fragcoord =
+          state->fs_origin_upper_left ||
+          state->fs_pixel_center_integer ||
+          state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers;
+    }
+ 
+    var->data.pixel_center_integer = qual->flags.q.pixel_center_integer;
+    var->data.origin_upper_left = qual->flags.q.origin_upper_left;
+    if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer)
+        && (strcmp(var->name, "gl_FragCoord") != 0)) {
+       const char *const qual_string = (qual->flags.q.origin_upper_left)
+          ? "origin_upper_left" : "pixel_center_integer";
+ 
+       _mesa_glsl_error(loc, state,
+                      "layout qualifier `%s' can only be applied to "
+                      "fragment shader input `gl_FragCoord'",
+                      qual_string);
+    }
+ 
+    if (qual->flags.q.explicit_location) {
+       apply_explicit_location(qual, var, state, loc);
+    } else if (qual->flags.q.explicit_index) {
+       if (!qual->flags.q.subroutine_def)
+          _mesa_glsl_error(loc, state,
+                           "explicit index requires explicit location");
+    }
+ 
+    if (qual->flags.q.explicit_binding) {
+       apply_explicit_binding(state, loc, var, var->type, qual);
+    }
+ 
+    if (state->stage == MESA_SHADER_GEOMETRY &&
+        qual->flags.q.out && qual->flags.q.stream) {
+       unsigned qual_stream;
+       if (process_qualifier_constant(state, loc, "stream", qual->stream,
+                                      &qual_stream) &&
+           validate_stream_qualifier(loc, state, qual_stream)) {
+          var->data.stream = qual_stream;
+       }
+    }
+ 
+    if (var->type->contains_atomic()) {
+       if (var->data.mode == ir_var_uniform) {
+          if (var->data.explicit_binding) {
+             unsigned *offset =
+                &state->atomic_counter_offsets[var->data.binding];
+ 
+             if (*offset % ATOMIC_COUNTER_SIZE)
+                _mesa_glsl_error(loc, state,
+                                 "misaligned atomic counter offset");
+ 
+             var->data.offset = *offset;
+             *offset += var->type->atomic_size();
+ 
+          } else {
+             _mesa_glsl_error(loc, state,
+                              "atomic counters require explicit binding point");
+          }
+       } else if (var->data.mode != ir_var_function_in) {
+          _mesa_glsl_error(loc, state, "atomic counters may only be declared as "
+                           "function parameters or uniform-qualified "
+                           "global variables");
+       }
+    }
+ 
+    /* Is the 'layout' keyword used with parameters that allow relaxed checking.
+     * Many implementations of GL_ARB_fragment_coord_conventions_enable and some
+     * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable
+     * allowed the layout qualifier to be used with 'varying' and 'attribute'.
+     * These extensions and all following extensions that add the 'layout'
+     * keyword have been modified to require the use of 'in' or 'out'.
+     *
+     * The following extension do not allow the deprecated keywords:
+     *
+     *    GL_AMD_conservative_depth
+     *    GL_ARB_conservative_depth
+     *    GL_ARB_gpu_shader5
+     *    GL_ARB_separate_shader_objects
+     *    GL_ARB_tessellation_shader
+     *    GL_ARB_transform_feedback3
+     *    GL_ARB_uniform_buffer_object
+     *
+     * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5
+     * allow layout with the deprecated keywords.
+     */
+    const bool relaxed_layout_qualifier_checking =
+       state->ARB_fragment_coord_conventions_enable;
+ 
+    const bool uses_deprecated_qualifier = qual->flags.q.attribute
+       || qual->flags.q.varying;
+    if (qual->has_layout() && uses_deprecated_qualifier) {
+       if (relaxed_layout_qualifier_checking) {
+          _mesa_glsl_warning(loc, state,
+                             "`layout' qualifier may not be used with "
+                             "`attribute' or `varying'");
+       } else {
+          _mesa_glsl_error(loc, state,
+                           "`layout' qualifier may not be used with "
+                           "`attribute' or `varying'");
+       }
+    }
+ 
+    /* Layout qualifiers for gl_FragDepth, which are enabled by extension
+     * AMD_conservative_depth.
+     */
+    int depth_layout_count = qual->flags.q.depth_any
+       + qual->flags.q.depth_greater
+       + qual->flags.q.depth_less
+       + qual->flags.q.depth_unchanged;
+    if (depth_layout_count > 0
+        && !state->AMD_conservative_depth_enable
+        && !state->ARB_conservative_depth_enable) {
+        _mesa_glsl_error(loc, state,
+                         "extension GL_AMD_conservative_depth or "
+                         "GL_ARB_conservative_depth must be enabled "
+                         "to use depth layout qualifiers");
+    } else if (depth_layout_count > 0
+               && strcmp(var->name, "gl_FragDepth") != 0) {
+        _mesa_glsl_error(loc, state,
+                         "depth layout qualifiers can be applied only to "
+                         "gl_FragDepth");
+    } else if (depth_layout_count > 1
+               && strcmp(var->name, "gl_FragDepth") == 0) {
+       _mesa_glsl_error(loc, state,
+                        "at most one depth layout qualifier can be applied to "
+                        "gl_FragDepth");
+    }
+    if (qual->flags.q.depth_any)
+       var->data.depth_layout = ir_depth_layout_any;
+    else if (qual->flags.q.depth_greater)
+       var->data.depth_layout = ir_depth_layout_greater;
+    else if (qual->flags.q.depth_less)
+       var->data.depth_layout = ir_depth_layout_less;
+    else if (qual->flags.q.depth_unchanged)
+        var->data.depth_layout = ir_depth_layout_unchanged;
+    else
+        var->data.depth_layout = ir_depth_layout_none;
+ 
+    if (qual->flags.q.std140 ||
+        qual->flags.q.std430 ||
+        qual->flags.q.packed ||
+        qual->flags.q.shared) {
+       _mesa_glsl_error(loc, state,
+                        "uniform and shader storage block layout qualifiers "
+                        "std140, std430, packed, and shared can only be "
+                        "applied to uniform or shader storage blocks, not "
+                        "members");
+    }
+ 
+    if (qual->flags.q.row_major || qual->flags.q.column_major) {
+       validate_matrix_layout_for_type(state, loc, var->type, var);
+    }
+ 
+    /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader
+     * Inputs):
+     *
+     *  "Fragment shaders also allow the following layout qualifier on in only
+     *   (not with variable declarations)
+     *     layout-qualifier-id
+     *        early_fragment_tests
+     *   [...]"
+     */
+    if (qual->flags.q.early_fragment_tests) {
+       _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only "
+                        "valid in fragment shader input layout declaration.");
+    }
+ }
+ 
+ static void
+ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
+                                  ir_variable *var,
+                                  struct _mesa_glsl_parse_state *state,
+                                  YYLTYPE *loc,
+                                  bool is_parameter)
+ {
+    STATIC_ASSERT(sizeof(qual->flags.q) <= sizeof(qual->flags.i));
+ 
+    if (qual->flags.q.invariant) {
+       if (var->data.used) {
+          _mesa_glsl_error(loc, state,
+                           "variable `%s' may not be redeclared "
+                           "`invariant' after being used",
+                           var->name);
+       } else {
+          var->data.invariant = 1;
+       }
+    }
+ 
+    if (qual->flags.q.precise) {
+       if (var->data.used) {
+          _mesa_glsl_error(loc, state,
+                           "variable `%s' may not be redeclared "
+                           "`precise' after being used",
+                           var->name);
+       } else {
+          var->data.precise = 1;
+       }
+    }
+ 
+    if (qual->flags.q.subroutine && !qual->flags.q.uniform) {
+       _mesa_glsl_error(loc, state,
+                        "`subroutine' may only be applied to uniforms, "
+                        "subroutine type declarations, or function definitions");
+    }
+ 
+    if (qual->flags.q.constant || qual->flags.q.attribute
+        || qual->flags.q.uniform
+        || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT)))
+       var->data.read_only = 1;
+ 
+    if (qual->flags.q.centroid)
+       var->data.centroid = 1;
+ 
+    if (qual->flags.q.sample)
+       var->data.sample = 1;
+ 
+    /* Precision qualifiers do not hold any meaning in Desktop GLSL */
+    if (state->es_shader) {
+       var->data.precision =
+          select_gles_precision(qual->precision, var->type, state, loc);
+    }
+ 
+    if (qual->flags.q.patch)
+       var->data.patch = 1;
+ 
+    if (qual->flags.q.attribute && state->stage != MESA_SHADER_VERTEX) {
+       var->type = glsl_type::error_type;
+       _mesa_glsl_error(loc, state,
+                        "`attribute' variables may not be declared in the "
+                        "%s shader",
+                        _mesa_shader_stage_to_string(state->stage));
+    }
+ 
+    /* Disallow layout qualifiers which may only appear on layout declarations. */
+    if (qual->flags.q.prim_type) {
+       _mesa_glsl_error(loc, state,
+                        "Primitive type may only be specified on GS input or output "
+                        "layout declaration, not on variables.");
+    }
+ 
+    /* Section 6.1.1 (Function Calling Conventions) of the GLSL 1.10 spec says:
+     *
+     *     "However, the const qualifier cannot be used with out or inout."
+     *
+     * The same section of the GLSL 4.40 spec further clarifies this saying:
+     *
+     *     "The const qualifier cannot be used with out or inout, or a
+     *     compile-time error results."
+     */
+    if (is_parameter && qual->flags.q.constant && qual->flags.q.out) {
+       _mesa_glsl_error(loc, state,
+                        "`const' may not be applied to `out' or `inout' "
+                        "function parameters");
+    }
+ 
+    /* If there is no qualifier that changes the mode of the variable, leave
+     * the setting alone.
+     */
+    assert(var->data.mode != ir_var_temporary);
+    if (qual->flags.q.in && qual->flags.q.out)
+       var->data.mode = ir_var_function_inout;
+    else if (qual->flags.q.in)
+       var->data.mode = is_parameter ? ir_var_function_in : ir_var_shader_in;
+    else if (qual->flags.q.attribute
+           || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT)))
+       var->data.mode = ir_var_shader_in;
+    else if (qual->flags.q.out)
+       var->data.mode = is_parameter ? ir_var_function_out : ir_var_shader_out;
+    else if (qual->flags.q.varying && (state->stage == MESA_SHADER_VERTEX))
+       var->data.mode = ir_var_shader_out;
+    else if (qual->flags.q.uniform)
+       var->data.mode = ir_var_uniform;
+    else if (qual->flags.q.buffer)
+       var->data.mode = ir_var_shader_storage;
+    else if (qual->flags.q.shared_storage)
+       var->data.mode = ir_var_shader_shared;
+ 
+    if (!is_parameter && is_varying_var(var, state->stage)) {
+       /* User-defined ins/outs are not permitted in compute shaders. */
+       if (state->stage == MESA_SHADER_COMPUTE) {
+          _mesa_glsl_error(loc, state,
+                           "user-defined input and output variables are not "
+                           "permitted in compute shaders");
+       }
+ 
+       /* This variable is being used to link data between shader stages (in
+        * pre-glsl-1.30 parlance, it's a "varying").  Check that it has a type
+        * that is allowed for such purposes.
+        *
+        * From page 25 (page 31 of the PDF) of the GLSL 1.10 spec:
+        *
+        *     "The varying qualifier can be used only with the data types
+        *     float, vec2, vec3, vec4, mat2, mat3, and mat4, or arrays of
+        *     these."
+        *
+        * This was relaxed in GLSL version 1.30 and GLSL ES version 3.00.  From
+        * page 31 (page 37 of the PDF) of the GLSL 1.30 spec:
+        *
+        *     "Fragment inputs can only be signed and unsigned integers and
+        *     integer vectors, float, floating-point vectors, matrices, or
+        *     arrays of these. Structures cannot be input.
+        *
+        * Similar text exists in the section on vertex shader outputs.
+        *
+        * Similar text exists in the GLSL ES 3.00 spec, except that the GLSL ES
+        * 3.00 spec allows structs as well.  Varying structs are also allowed
+        * in GLSL 1.50.
+        */
+       switch (var->type->get_scalar_type()->base_type) {
+       case GLSL_TYPE_FLOAT:
+          /* Ok in all GLSL versions */
+          break;
+       case GLSL_TYPE_UINT:
+       case GLSL_TYPE_INT:
+          if (state->is_version(130, 300))
+             break;
+          _mesa_glsl_error(loc, state,
+                           "varying variables must be of base type float in %s",
+                           state->get_version_string());
+          break;
+       case GLSL_TYPE_STRUCT:
+          if (state->is_version(150, 300))
+             break;
+          _mesa_glsl_error(loc, state,
+                           "varying variables may not be of type struct");
+          break;
+       case GLSL_TYPE_DOUBLE:
+          break;
+       default:
+          _mesa_glsl_error(loc, state, "illegal type for a varying variable");
+          break;
+       }
+    }
+ 
+    if (state->all_invariant && (state->current_function == NULL)) {
+       switch (state->stage) {
+       case MESA_SHADER_VERTEX:
+          if (var->data.mode == ir_var_shader_out)
+             var->data.invariant = true;
+          break;
+       case MESA_SHADER_TESS_CTRL:
+       case MESA_SHADER_TESS_EVAL:
+       case MESA_SHADER_GEOMETRY:
+          if ((var->data.mode == ir_var_shader_in)
+              || (var->data.mode == ir_var_shader_out))
+             var->data.invariant = true;
+          break;
+       case MESA_SHADER_FRAGMENT:
+          if (var->data.mode == ir_var_shader_in)
+             var->data.invariant = true;
+          break;
+       case MESA_SHADER_COMPUTE:
+          /* Invariance isn't meaningful in compute shaders. */
+          break;
+       }
+    }
+ 
+    var->data.interpolation =
+       interpret_interpolation_qualifier(qual, (ir_variable_mode) var->data.mode,
+                                         state, loc);
+ 
+    /* Does the declaration use the deprecated 'attribute' or 'varying'
+     * keywords?
+     */
+    const bool uses_deprecated_qualifier = qual->flags.q.attribute
+       || qual->flags.q.varying;
+ 
+ 
+    /* Validate auxiliary storage qualifiers */
+ 
+    /* From section 4.3.4 of the GLSL 1.30 spec:
+     *    "It is an error to use centroid in in a vertex shader."
+     *
+     * From section 4.3.4 of the GLSL ES 3.00 spec:
+     *    "It is an error to use centroid in or interpolation qualifiers in
+     *    a vertex shader input."
+     */
+ 
+    /* Section 4.3.6 of the GLSL 1.30 specification states:
+     * "It is an error to use centroid out in a fragment shader."
+     *
+     * The GL_ARB_shading_language_420pack extension specification states:
+     * "It is an error to use auxiliary storage qualifiers or interpolation
+     *  qualifiers on an output in a fragment shader."
+     */
+    if (qual->flags.q.sample && (!is_varying_var(var, state->stage) || uses_deprecated_qualifier)) {
+       _mesa_glsl_error(loc, state,
+                        "sample qualifier may only be used on `in` or `out` "
+                        "variables between shader stages");
+    }
+    if (qual->flags.q.centroid && !is_varying_var(var, state->stage)) {
+       _mesa_glsl_error(loc, state,
+                        "centroid qualifier may only be used with `in', "
+                        "`out' or `varying' variables between shader stages");
+    }
+ 
+    if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) {
+       _mesa_glsl_error(loc, state,
+                        "the shared storage qualifiers can only be used with "
+                        "compute shaders");
+    }
+ 
+    apply_image_qualifier_to_variable(qual, var, state, loc);
+ }
+ 
+ /**
+  * Get the variable that is being redeclared by this declaration
+  *
+  * Semantic checks to verify the validity of the redeclaration are also
+  * performed.  If semantic checks fail, compilation error will be emitted via
+  * \c _mesa_glsl_error, but a non-\c NULL pointer will still be returned.
+  *
+  * \returns
+  * A pointer to an existing variable in the current scope if the declaration
+  * is a redeclaration, \c NULL otherwise.
+  */
+ static ir_variable *
+ get_variable_being_redeclared(ir_variable *var, YYLTYPE loc,
+                               struct _mesa_glsl_parse_state *state,
+                               bool allow_all_redeclarations)
+ {
+    /* Check if this declaration is actually a re-declaration, either to
+     * resize an array or add qualifiers to an existing variable.
+     *
+     * This is allowed for variables in the current scope, or when at
+     * global scope (for built-ins in the implicit outer scope).
+     */
+    ir_variable *earlier = state->symbols->get_variable(var->name);
+    if (earlier == NULL ||
+        (state->current_function != NULL &&
+        !state->symbols->name_declared_this_scope(var->name))) {
+       return NULL;
+    }
+ 
+ 
+    /* From page 24 (page 30 of the PDF) of the GLSL 1.50 spec,
+     *
+     * "It is legal to declare an array without a size and then
+     *  later re-declare the same name as an array of the same
+     *  type and specify a size."
+     */
+    if (earlier->type->is_unsized_array() && var->type->is_array()
+        && (var->type->fields.array == earlier->type->fields.array)) {
+       /* FINISHME: This doesn't match the qualifiers on the two
+        * FINISHME: declarations.  It's not 100% clear whether this is
+        * FINISHME: required or not.
+        */
+ 
+       const unsigned size = unsigned(var->type->array_size());
+       check_builtin_array_max_size(var->name, size, loc, state);
+       if ((size > 0) && (size <= earlier->data.max_array_access)) {
+          _mesa_glsl_error(& loc, state, "array size must be > %u due to "
+                           "previous access",
+                           earlier->data.max_array_access);
+       }
+ 
+       earlier->type = var->type;
+       delete var;
+       var = NULL;
+    } else if ((state->ARB_fragment_coord_conventions_enable ||
+               state->is_version(150, 0))
+               && strcmp(var->name, "gl_FragCoord") == 0
+               && earlier->type == var->type
+               && var->data.mode == ir_var_shader_in) {
+       /* Allow redeclaration of gl_FragCoord for ARB_fcc layout
+        * qualifiers.
+        */
+       earlier->data.origin_upper_left = var->data.origin_upper_left;
+       earlier->data.pixel_center_integer = var->data.pixel_center_integer;
+ 
+       /* According to section 4.3.7 of the GLSL 1.30 spec,
+        * the following built-in varaibles can be redeclared with an
+        * interpolation qualifier:
+        *    * gl_FrontColor
+        *    * gl_BackColor
+        *    * gl_FrontSecondaryColor
+        *    * gl_BackSecondaryColor
+        *    * gl_Color
+        *    * gl_SecondaryColor
+        */
+    } else if (state->is_version(130, 0)
+               && (strcmp(var->name, "gl_FrontColor") == 0
+                   || strcmp(var->name, "gl_BackColor") == 0
+                   || strcmp(var->name, "gl_FrontSecondaryColor") == 0
+                   || strcmp(var->name, "gl_BackSecondaryColor") == 0
+                   || strcmp(var->name, "gl_Color") == 0
+                   || strcmp(var->name, "gl_SecondaryColor") == 0)
+               && earlier->type == var->type
+               && earlier->data.mode == var->data.mode) {
+       earlier->data.interpolation = var->data.interpolation;
+ 
+       /* Layout qualifiers for gl_FragDepth. */
+    } else if ((state->AMD_conservative_depth_enable ||
+                state->ARB_conservative_depth_enable)
+               && strcmp(var->name, "gl_FragDepth") == 0
+               && earlier->type == var->type
+               && earlier->data.mode == var->data.mode) {
+ 
+       /** From the AMD_conservative_depth spec:
+        *     Within any shader, the first redeclarations of gl_FragDepth
+        *     must appear before any use of gl_FragDepth.
+        */
+       if (earlier->data.used) {
+          _mesa_glsl_error(&loc, state,
+                           "the first redeclaration of gl_FragDepth "
+                           "must appear before any use of gl_FragDepth");
+       }
+ 
+       /* Prevent inconsistent redeclaration of depth layout qualifier. */
+       if (earlier->data.depth_layout != ir_depth_layout_none
+           && earlier->data.depth_layout != var->data.depth_layout) {
+             _mesa_glsl_error(&loc, state,
+                              "gl_FragDepth: depth layout is declared here "
+                              "as '%s, but it was previously declared as "
+                              "'%s'",
+                              depth_layout_string(var->data.depth_layout),
+                              depth_layout_string(earlier->data.depth_layout));
+       }
+ 
+       earlier->data.depth_layout = var->data.depth_layout;
+ 
+    } else if (allow_all_redeclarations) {
+       if (earlier->data.mode != var->data.mode) {
+          _mesa_glsl_error(&loc, state,
+                           "redeclaration of `%s' with incorrect qualifiers",
+                           var->name);
+       } else if (earlier->type != var->type) {
+          _mesa_glsl_error(&loc, state,
+                           "redeclaration of `%s' has incorrect type",
+                           var->name);
+       }
+    } else {
+       _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name);
+    }
+ 
+    return earlier;
+ }
+ 
+ /**
+  * Generate the IR for an initializer in a variable declaration
+  */
+ ir_rvalue *
+ process_initializer(ir_variable *var, ast_declaration *decl,
+                   ast_fully_specified_type *type,
+                   exec_list *initializer_instructions,
+                   struct _mesa_glsl_parse_state *state)
+ {
+    ir_rvalue *result = NULL;
+ 
+    YYLTYPE initializer_loc = decl->initializer->get_location();
+ 
+    /* From page 24 (page 30 of the PDF) of the GLSL 1.10 spec:
+     *
+     *    "All uniform variables are read-only and are initialized either
+     *    directly by an application via API commands, or indirectly by
+     *    OpenGL."
+     */
+    if (var->data.mode == ir_var_uniform) {
+       state->check_version(120, 0, &initializer_loc,
+                            "cannot initialize uniform %s",
+                            var->name);
+    }
+ 
+    /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:
+     *
+     *    "Buffer variables cannot have initializers."
+     */
+    if (var->data.mode == ir_var_shader_storage) {
+       _mesa_glsl_error(&initializer_loc, state,
+                        "cannot initialize buffer variable %s",
+                        var->name);
+    }
+ 
+    /* From section 4.1.7 of the GLSL 4.40 spec:
+     *
+     *    "Opaque variables [...] are initialized only through the
+     *     OpenGL API; they cannot be declared with an initializer in a
+     *     shader."
+     */
+    if (var->type->contains_opaque()) {
+       _mesa_glsl_error(&initializer_loc, state,
+                        "cannot initialize opaque variable %s",
+                        var->name);
+    }
+ 
+    if ((var->data.mode == ir_var_shader_in) && (state->current_function == NULL)) {
+       _mesa_glsl_error(&initializer_loc, state,
+                        "cannot initialize %s shader input / %s %s",
+                        _mesa_shader_stage_to_string(state->stage),
+                        (state->stage == MESA_SHADER_VERTEX)
+                        ? "attribute" : "varying",
+                        var->name);
+    }
+ 
+    if (var->data.mode == ir_var_shader_out && state->current_function == NULL) {
+       _mesa_glsl_error(&initializer_loc, state,
+                        "cannot initialize %s shader output %s",
+                        _mesa_shader_stage_to_string(state->stage),
+                        var->name);
+    }
+ 
+    /* If the initializer is an ast_aggregate_initializer, recursively store
+     * type information from the LHS into it, so that its hir() function can do
+     * type checking.
+     */
+    if (decl->initializer->oper == ast_aggregate)
+       _mesa_ast_set_aggregate_type(var->type, decl->initializer);
+ 
+    ir_dereference *const lhs = new(state) ir_dereference_variable(var);
+    ir_rvalue *rhs = decl->initializer->hir(initializer_instructions, state);
+ 
+    /* Calculate the constant value if this is a const or uniform
+     * declaration.
+     *
+     * Section 4.3 (Storage Qualifiers) of the GLSL ES 1.00.17 spec says:
+     *
+     *     "Declarations of globals without a storage qualifier, or with
+     *     just the const qualifier, may include initializers, in which case
+     *     they will be initialized before the first line of main() is
+     *     executed.  Such initializers must be a constant expression."
+     *
+     * The same section of the GLSL ES 3.00.4 spec has similar language.
+     */
+    if (type->qualifier.flags.q.constant
+        || type->qualifier.flags.q.uniform
+        || (state->es_shader && state->current_function == NULL)) {
+       ir_rvalue *new_rhs = validate_assignment(state, initializer_loc,
+                                                lhs, rhs, true);
+       if (new_rhs != NULL) {
+          rhs = new_rhs;
+ 
+          /* Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec
+           * says:
+           *
+           *     "A constant expression is one of
+           *
+           *        ...
+           *
+           *        - an expression formed by an operator on operands that are
+           *          all constant expressions, including getting an element of
+           *          a constant array, or a field of a constant structure, or
+           *          components of a constant vector.  However, the sequence
+           *          operator ( , ) and the assignment operators ( =, +=, ...)
+           *          are not included in the operators that can create a
+           *          constant expression."
+           *
+           * Section 12.43 (Sequence operator and constant expressions) says:
+           *
+           *     "Should the following construct be allowed?
+           *
+           *         float a[2,3];
+           *
+           *     The expression within the brackets uses the sequence operator
+           *     (',') and returns the integer 3 so the construct is declaring
+           *     a single-dimensional array of size 3.  In some languages, the
+           *     construct declares a two-dimensional array.  It would be
+           *     preferable to make this construct illegal to avoid confusion.
+           *
+           *     One possibility is to change the definition of the sequence
+           *     operator so that it does not return a constant-expression and
+           *     hence cannot be used to declare an array size.
+           *
+           *     RESOLUTION: The result of a sequence operator is not a
+           *     constant-expression."
+           *
+           * Section 4.3.3 (Constant Expressions) of the GLSL 4.30.9 spec
+           * contains language almost identical to the section 4.3.3 in the
+           * GLSL ES 3.00.4 spec.  This is a new limitation for these GLSL
+           * versions.
+           */
+          ir_constant *constant_value = rhs->constant_expression_value();
+          if (!constant_value ||
+              (state->is_version(430, 300) &&
+               decl->initializer->has_sequence_subexpression())) {
+             const char *const variable_mode =
+                (type->qualifier.flags.q.constant)
+                ? "const"
+                : ((type->qualifier.flags.q.uniform) ? "uniform" : "global");
+ 
+             /* If ARB_shading_language_420pack is enabled, initializers of
+              * const-qualified local variables do not have to be constant
+              * expressions. Const-qualified global variables must still be
+              * initialized with constant expressions.
+              */
+             if (!state->has_420pack()
+                 || state->current_function == NULL) {
+                _mesa_glsl_error(& initializer_loc, state,
+                                 "initializer of %s variable `%s' must be a "
+                                 "constant expression",
+                                 variable_mode,
+                                 decl->identifier);
+                if (var->type->is_numeric()) {
+                   /* Reduce cascading errors. */
+                   var->constant_value = type->qualifier.flags.q.constant
+                      ? ir_constant::zero(state, var->type) : NULL;
+                }
+             }
+          } else {
+             rhs = constant_value;
+             var->constant_value = type->qualifier.flags.q.constant
+                ? constant_value : NULL;
+          }
+       } else {
+          if (var->type->is_numeric()) {
+             /* Reduce cascading errors. */
+             var->constant_value = type->qualifier.flags.q.constant
+                ? ir_constant::zero(state, var->type) : NULL;
+          }
+       }
+    }
+ 
+    if (rhs && !rhs->type->is_error()) {
+       bool temp = var->data.read_only;
+       if (type->qualifier.flags.q.constant)
+          var->data.read_only = false;
+ 
+       /* Never emit code to initialize a uniform.
+        */
+       const glsl_type *initializer_type;
+       if (!type->qualifier.flags.q.uniform) {
+          do_assignment(initializer_instructions, state,
+                        NULL,
+                        lhs, rhs,
+                        &result, true,
+                        true,
+                        type->get_location());
+          initializer_type = result->type;
+       } else
+          initializer_type = rhs->type;
+ 
+       var->constant_initializer = rhs->constant_expression_value();
+       var->data.has_initializer = true;
+ 
+       /* If the declared variable is an unsized array, it must inherrit
+        * its full type from the initializer.  A declaration such as
+        *
+        *     uniform float a[] = float[](1.0, 2.0, 3.0, 3.0);
+        *
+        * becomes
+        *
+        *     uniform float a[4] = float[](1.0, 2.0, 3.0, 3.0);
+        *
+        * The assignment generated in the if-statement (below) will also
+        * automatically handle this case for non-uniforms.
+        *
+        * If the declared variable is not an array, the types must
+        * already match exactly.  As a result, the type assignment
+        * here can be done unconditionally.  For non-uniforms the call
+        * to do_assignment can change the type of the initializer (via
+        * the implicit conversion rules).  For uniforms the initializer
+        * must be a constant expression, and the type of that expression
+        * was validated above.
+        */
+       var->type = initializer_type;
+ 
+       var->data.read_only = temp;
+    }
+ 
+    return result;
+ }
+ 
+ static void
+ validate_layout_qualifier_vertex_count(struct _mesa_glsl_parse_state *state,
+                                        YYLTYPE loc, ir_variable *var,
+                                        unsigned num_vertices,
+                                        unsigned *size,
+                                        const char *var_category)
+ {
+    if (var->type->is_unsized_array()) {
+       /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec says:
+        *
+        *   All geometry shader input unsized array declarations will be
+        *   sized by an earlier input layout qualifier, when present, as per
+        *   the following table.
+        *
+        * Followed by a table mapping each allowed input layout qualifier to
+        * the corresponding input length.
+        *
+        * Similarly for tessellation control shader outputs.
+        */
+       if (num_vertices != 0)
+          var->type = glsl_type::get_array_instance(var->type->fields.array,
+                                                    num_vertices);
+    } else {
+       /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec
+        * includes the following examples of compile-time errors:
+        *
+        *   // code sequence within one shader...
+        *   in vec4 Color1[];    // size unknown
+        *   ...Color1.length()...// illegal, length() unknown
+        *   in vec4 Color2[2];   // size is 2
+        *   ...Color1.length()...// illegal, Color1 still has no size
+        *   in vec4 Color3[3];   // illegal, input sizes are inconsistent
+        *   layout(lines) in;    // legal, input size is 2, matching
+        *   in vec4 Color4[3];   // illegal, contradicts layout
+        *   ...
+        *
+        * To detect the case illustrated by Color3, we verify that the size of
+        * an explicitly-sized array matches the size of any previously declared
+        * explicitly-sized array.  To detect the case illustrated by Color4, we
+        * verify that the size of an explicitly-sized array is consistent with
+        * any previously declared input layout.
+        */
+       if (num_vertices != 0 && var->type->length != num_vertices) {
+          _mesa_glsl_error(&loc, state,
+                           "%s size contradicts previously declared layout "
+                           "(size is %u, but layout requires a size of %u)",
+                           var_category, var->type->length, num_vertices);
+       } else if (*size != 0 && var->type->length != *size) {
+          _mesa_glsl_error(&loc, state,
+                           "%s sizes are inconsistent (size is %u, but a "
+                           "previous declaration has size %u)",
+                           var_category, var->type->length, *size);
+       } else {
+          *size = var->type->length;
+       }
+    }
+ }
+ 
+ static void
+ handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state,
+                                     YYLTYPE loc, ir_variable *var)
+ {
+    unsigned num_vertices = 0;
+ 
+    if (state->tcs_output_vertices_specified) {
+       if (!state->out_qualifier->vertices->
+              process_qualifier_constant(state, "vertices",
+                                         &num_vertices, false)) {
+          return;
+       }
+ 
+       if (num_vertices > state->Const.MaxPatchVertices) {
+          _mesa_glsl_error(&loc, state, "vertices (%d) exceeds "
+                           "GL_MAX_PATCH_VERTICES", num_vertices);
+          return;
+       }
+    }
+ 
+    if (!var->type->is_array() && !var->data.patch) {
+       _mesa_glsl_error(&loc, state,
+                        "tessellation control shader outputs must be arrays");
+ 
+       /* To avoid cascading failures, short circuit the checks below. */
+       return;
+    }
+ 
+    if (var->data.patch)
+       return;
+ 
+    validate_layout_qualifier_vertex_count(state, loc, var, num_vertices,
+                                           &state->tcs_output_size,
+                                           "tessellation control shader output");
+ }
+ 
+ /**
+  * Do additional processing necessary for tessellation control/evaluation shader
+  * input declarations. This covers both interface block arrays and bare input
+  * variables.
+  */
+ static void
+ handle_tess_shader_input_decl(struct _mesa_glsl_parse_state *state,
+                               YYLTYPE loc, ir_variable *var)
+ {
+    if (!var->type->is_array() && !var->data.patch) {
+       _mesa_glsl_error(&loc, state,
+                        "per-vertex tessellation shader inputs must be arrays");
+       /* Avoid cascading failures. */
+       return;
+    }
+ 
+    if (var->data.patch)
+       return;
+ 
+    /* Unsized arrays are implicitly sized to gl_MaxPatchVertices. */
+    if (var->type->is_unsized_array()) {
+       var->type = glsl_type::get_array_instance(var->type->fields.array,
+             state->Const.MaxPatchVertices);
+    }
+ }
+ 
+ 
+ /**
+  * Do additional processing necessary for geometry shader input declarations
+  * (this covers both interface blocks arrays and bare input variables).
+  */
+ static void
+ handle_geometry_shader_input_decl(struct _mesa_glsl_parse_state *state,
+                                   YYLTYPE loc, ir_variable *var)
+ {
+    unsigned num_vertices = 0;
+ 
+    if (state->gs_input_prim_type_specified) {
+       num_vertices = vertices_per_prim(state->in_qualifier->prim_type);
+    }
+ 
+    /* Geometry shader input variables must be arrays.  Caller should have
+     * reported an error for this.
+     */
+    if (!var->type->is_array()) {
+       assert(state->error);
+ 
+       /* To avoid cascading failures, short circuit the checks below. */
+       return;
+    }
+ 
+    validate_layout_qualifier_vertex_count(state, loc, var, num_vertices,
+                                           &state->gs_input_size,
+                                           "geometry shader input");
+ }
+ 
+ void
+ validate_identifier(const char *identifier, YYLTYPE loc,
+                     struct _mesa_glsl_parse_state *state)
+ {
+    /* From page 15 (page 21 of the PDF) of the GLSL 1.10 spec,
+     *
+     *   "Identifiers starting with "gl_" are reserved for use by
+     *   OpenGL, and may not be declared in a shader as either a
+     *   variable or a function."
+     */
+    if (is_gl_identifier(identifier)) {
+       _mesa_glsl_error(&loc, state,
+                        "identifier `%s' uses reserved `gl_' prefix",
+                        identifier);
+    } else if (strstr(identifier, "__")) {
+       /* From page 14 (page 20 of the PDF) of the GLSL 1.10
+        * spec:
+        *
+        *     "In addition, all identifiers containing two
+        *      consecutive underscores (__) are reserved as
+        *      possible future keywords."
+        *
+        * The intention is that names containing __ are reserved for internal
+        * use by the implementation, and names prefixed with GL_ are reserved
+        * for use by Khronos.  Names simply containing __ are dangerous to use,
+        * but should be allowed.
+        *
+        * A future version of the GLSL specification will clarify this.
+        */
+       _mesa_glsl_warning(&loc, state,
+                          "identifier `%s' uses reserved `__' string",
+                          identifier);
+    }
+ }
+ 
+ ir_rvalue *
+ ast_declarator_list::hir(exec_list *instructions,
+                          struct _mesa_glsl_parse_state *state)
+ {
+    void *ctx = state;
+    const struct glsl_type *decl_type;
+    const char *type_name = NULL;
+    ir_rvalue *result = NULL;
+    YYLTYPE loc = this->get_location();
+ 
+    /* From page 46 (page 52 of the PDF) of the GLSL 1.50 spec:
+     *
+     *     "To ensure that a particular output variable is invariant, it is
+     *     necessary to use the invariant qualifier. It can either be used to
+     *     qualify a previously declared variable as being invariant
+     *
+     *         invariant gl_Position; // make existing gl_Position be invariant"
+     *
+     * In these cases the parser will set the 'invariant' flag in the declarator
+     * list, and the type will be NULL.
+     */
+    if (this->invariant) {
+       assert(this->type == NULL);
+ 
+       if (state->current_function != NULL) {
+          _mesa_glsl_error(& loc, state,
+                           "all uses of `invariant' keyword must be at global "
+                           "scope");
+       }
+ 
+       foreach_list_typed (ast_declaration, decl, link, &this->declarations) {
+          assert(decl->array_specifier == NULL);
+          assert(decl->initializer == NULL);
+ 
+          ir_variable *const earlier =
+             state->symbols->get_variable(decl->identifier);
+          if (earlier == NULL) {
+             _mesa_glsl_error(& loc, state,
+                              "undeclared variable `%s' cannot be marked "
+                              "invariant", decl->identifier);
+          } else if (!is_varying_var(earlier, state->stage)) {
+             _mesa_glsl_error(&loc, state,
+                              "`%s' cannot be marked invariant; interfaces between "
+                              "shader stages only.", decl->identifier);
+          } else if (earlier->data.used) {
+             _mesa_glsl_error(& loc, state,
+                             "variable `%s' may not be redeclared "
+                             "`invariant' after being used",
+                             earlier->name);
+          } else {
+             earlier->data.invariant = true;
+          }
+       }
+ 
+       /* Invariant redeclarations do not have r-values.
+        */
+       return NULL;
+    }
+ 
+    if (this->precise) {
+       assert(this->type == NULL);
+ 
+       foreach_list_typed (ast_declaration, decl, link, &this->declarations) {
+          assert(decl->array_specifier == NULL);
+          assert(decl->initializer == NULL);
+ 
+          ir_variable *const earlier =
+             state->symbols->get_variable(decl->identifier);
+          if (earlier == NULL) {
+             _mesa_glsl_error(& loc, state,
+                              "undeclared variable `%s' cannot be marked "
+                              "precise", decl->identifier);
+          } else if (state->current_function != NULL &&
+                     !state->symbols->name_declared_this_scope(decl->identifier)) {
+             /* Note: we have to check if we're in a function, since
+              * builtins are treated as having come from another scope.
+              */
+             _mesa_glsl_error(& loc, state,
+                              "variable `%s' from an outer scope may not be "
+                              "redeclared `precise' in this scope",
+                              earlier->name);
+          } else if (earlier->data.used) {
+             _mesa_glsl_error(& loc, state,
+                              "variable `%s' may not be redeclared "
+                              "`precise' after being used",
+                              earlier->name);
+          } else {
+             earlier->data.precise = true;
+          }
+       }
+ 
+       /* Precise redeclarations do not have r-values either. */
+       return NULL;
+    }
+ 
+    assert(this->type != NULL);
+    assert(!this->invariant);
+    assert(!this->precise);
+ 
+    /* The type specifier may contain a structure definition.  Process that
+     * before any of the variable declarations.
+     */
+    (void) this->type->specifier->hir(instructions, state);
+ 
+    decl_type = this->type->glsl_type(& type_name, state);
+ 
+    /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:
+     *    "Buffer variables may only be declared inside interface blocks
+     *    (section 4.3.9 “Interface Blocks”), which are then referred to as
+     *    shader storage blocks. It is a compile-time error to declare buffer
+     *    variables at global scope (outside a block)."
+     */
+    if (type->qualifier.flags.q.buffer && !decl_type->is_interface()) {
+       _mesa_glsl_error(&loc, state,
+                        "buffer variables cannot be declared outside "
+                        "interface blocks");
+    }
+ 
+    /* An offset-qualified atomic counter declaration sets the default
+     * offset for the next declaration within the same atomic counter
+     * buffer.
+     */
+    if (decl_type && decl_type->contains_atomic()) {
+       if (type->qualifier.flags.q.explicit_binding &&
+           type->qualifier.flags.q.explicit_offset) {
+          unsigned qual_binding;
+          unsigned qual_offset;
+          if (process_qualifier_constant(state, &loc, "binding",
+                                         type->qualifier.binding,
+                                         &qual_binding)
+              && process_qualifier_constant(state, &loc, "offset",
+                                         type->qualifier.offset,
+                                         &qual_offset)) {
+             state->atomic_counter_offsets[qual_binding] = qual_offset;
+          }
+       }
+    }
+ 
+    if (this->declarations.is_empty()) {
+       /* If there is no structure involved in the program text, there are two
+        * possible scenarios:
+        *
+        * - The program text contained something like 'vec4;'.  This is an
+        *   empty declaration.  It is valid but weird.  Emit a warning.
+        *
+        * - The program text contained something like 'S;' and 'S' is not the
+        *   name of a known structure type.  This is both invalid and weird.
+        *   Emit an error.
+        *
+        * - The program text contained something like 'mediump float;'
+        *   when the programmer probably meant 'precision mediump
+        *   float;' Emit a warning with a description of what they
+        *   probably meant to do.
+        *
+        * Note that if decl_type is NULL and there is a structure involved,
+        * there must have been some sort of error with the structure.  In this
+        * case we assume that an error was already generated on this line of
+        * code for the structure.  There is no need to generate an additional,
+        * confusing error.
+        */
+       assert(this->type->specifier->structure == NULL || decl_type != NULL
+            || state->error);
+ 
+       if (decl_type == NULL) {
+          _mesa_glsl_error(&loc, state,
+                           "invalid type `%s' in empty declaration",
+                           type_name);
+       } else if (decl_type->base_type == GLSL_TYPE_ATOMIC_UINT) {
+          /* Empty atomic counter declarations are allowed and useful
+           * to set the default offset qualifier.
+           */
+          return NULL;
+       } else if (this->type->qualifier.precision != ast_precision_none) {
+          if (this->type->specifier->structure != NULL) {
+             _mesa_glsl_error(&loc, state,
+                              "precision qualifiers can't be applied "
+                              "to structures");
+          } else {
+             static const char *const precision_names[] = {
+                "highp",
+                "highp",
+                "mediump",
+                "lowp"
+             };
+ 
+             _mesa_glsl_warning(&loc, state,
+                                "empty declaration with precision qualifier, "
+                                "to set the default precision, use "
+                                "`precision %s %s;'",
+                                precision_names[this->type->qualifier.precision],
+                                type_name);
+          }
+       } else if (this->type->specifier->structure == NULL) {
+          _mesa_glsl_warning(&loc, state, "empty declaration");
+       }
+    }
+ 
+    foreach_list_typed (ast_declaration, decl, link, &this->declarations) {
+       const struct glsl_type *var_type;
+       ir_variable *var;
+       const char *identifier = decl->identifier;
+       /* FINISHME: Emit a warning if a variable declaration shadows a
+        * FINISHME: declaration at a higher scope.
+        */
+ 
+       if ((decl_type == NULL) || decl_type->is_void()) {
+          if (type_name != NULL) {
+             _mesa_glsl_error(& loc, state,
+                              "invalid type `%s' in declaration of `%s'",
+                              type_name, decl->identifier);
+          } else {
+             _mesa_glsl_error(& loc, state,
+                              "invalid type in declaration of `%s'",
+                              decl->identifier);
+          }
+          continue;
+       }
+ 
+       if (this->type->qualifier.flags.q.subroutine) {
+          const glsl_type *t;
+          const char *name;
+ 
+          t = state->symbols->get_type(this->type->specifier->type_name);
+          if (!t)
+             _mesa_glsl_error(& loc, state,
+                              "invalid type in declaration of `%s'",
+                              decl->identifier);
+          name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), decl->identifier);
+ 
+          identifier = name;
+ 
+       }
+       var_type = process_array_type(&loc, decl_type, decl->array_specifier,
+                                     state);
+ 
+       var = new(ctx) ir_variable(var_type, identifier, ir_var_auto);
+ 
+       /* The 'varying in' and 'varying out' qualifiers can only be used with
+        * ARB_geometry_shader4 and EXT_geometry_shader4, which we don't support
+        * yet.
+        */
+       if (this->type->qualifier.flags.q.varying) {
+          if (this->type->qualifier.flags.q.in) {
+             _mesa_glsl_error(& loc, state,
+                              "`varying in' qualifier in declaration of "
+                              "`%s' only valid for geometry shaders using "
+                              "ARB_geometry_shader4 or EXT_geometry_shader4",
+                              decl->identifier);
+          } else if (this->type->qualifier.flags.q.out) {
+             _mesa_glsl_error(& loc, state,
+                              "`varying out' qualifier in declaration of "
+                              "`%s' only valid for geometry shaders using "
+                              "ARB_geometry_shader4 or EXT_geometry_shader4",
+                              decl->identifier);
+          }
+       }
+ 
+       /* From page 22 (page 28 of the PDF) of the GLSL 1.10 specification;
+        *
+        *     "Global variables can only use the qualifiers const,
+        *     attribute, uniform, or varying. Only one may be
+        *     specified.
+        *
+        *     Local variables can only use the qualifier const."
+        *
+        * This is relaxed in GLSL 1.30 and GLSL ES 3.00.  It is also relaxed by
+        * any extension that adds the 'layout' keyword.
+        */
+       if (!state->is_version(130, 300)
+           && !state->has_explicit_attrib_location()
+           && !state->has_separate_shader_objects()
+           && !state->ARB_fragment_coord_conventions_enable) {
+          if (this->type->qualifier.flags.q.out) {
+             _mesa_glsl_error(& loc, state,
+                              "`out' qualifier in declaration of `%s' "
+                              "only valid for function parameters in %s",
+                              decl->identifier, state->get_version_string());
+          }
+          if (this->type->qualifier.flags.q.in) {
+             _mesa_glsl_error(& loc, state,
+                              "`in' qualifier in declaration of `%s' "
+                              "only valid for function parameters in %s",
+                              decl->identifier, state->get_version_string());
+          }
+          /* FINISHME: Test for other invalid qualifiers. */
+       }
+ 
+       apply_type_qualifier_to_variable(& this->type->qualifier, var, state,
+                                      & loc, false);
+       apply_layout_qualifier_to_variable(&this->type->qualifier, var, state,
+                                          &loc);
+ 
+       if (this->type->qualifier.flags.q.invariant) {
+          if (!is_varying_var(var, state->stage)) {
+             _mesa_glsl_error(&loc, state,
+                              "`%s' cannot be marked invariant; interfaces between "
+                              "shader stages only", var->name);
+          }
+       }
+ 
+       if (state->current_function != NULL) {
+          const char *mode = NULL;
+          const char *extra = "";
+ 
+          /* There is no need to check for 'inout' here because the parser will
+           * only allow that in function parameter lists.
+           */
+          if (this->type->qualifier.flags.q.attribute) {
+             mode = "attribute";
+          } else if (this->type->qualifier.flags.q.subroutine) {
+             mode = "subroutine uniform";
+          } else if (this->type->qualifier.flags.q.uniform) {
+             mode = "uniform";
+          } else if (this->type->qualifier.flags.q.varying) {
+             mode = "varying";
+          } else if (this->type->qualifier.flags.q.in) {
+             mode = "in";
+             extra = " or in function parameter list";
+          } else if (this->type->qualifier.flags.q.out) {
+             mode = "out";
+             extra = " or in function parameter list";
+          }
+ 
+          if (mode) {
+             _mesa_glsl_error(& loc, state,
+                              "%s variable `%s' must be declared at "
+                              "global scope%s",
+                              mode, var->name, extra);
+          }
+       } else if (var->data.mode == ir_var_shader_in) {
+          var->data.read_only = true;
+ 
+          if (state->stage == MESA_SHADER_VERTEX) {
+             bool error_emitted = false;
+ 
+             /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec:
+              *
+              *    "Vertex shader inputs can only be float, floating-point
+              *    vectors, matrices, signed and unsigned integers and integer
+              *    vectors. Vertex shader inputs can also form arrays of these
+              *    types, but not structures."
+              *
+              * From page 31 (page 27 of the PDF) of the GLSL 1.30 spec:
+              *
+              *    "Vertex shader inputs can only be float, floating-point
+              *    vectors, matrices, signed and unsigned integers and integer
+              *    vectors. They cannot be arrays or structures."
+              *
+              * From page 23 (page 29 of the PDF) of the GLSL 1.20 spec:
+              *
+              *    "The attribute qualifier can be used only with float,
+              *    floating-point vectors, and matrices. Attribute variables
+              *    cannot be declared as arrays or structures."
+              *
+              * From page 33 (page 39 of the PDF) of the GLSL ES 3.00 spec:
+              *
+              *    "Vertex shader inputs can only be float, floating-point
+              *    vectors, matrices, signed and unsigned integers and integer
+              *    vectors. Vertex shader inputs cannot be arrays or
+              *    structures."
+              */
+             const glsl_type *check_type = var->type->without_array();
+ 
+             switch (check_type->base_type) {
+             case GLSL_TYPE_FLOAT:
+             break;
+             case GLSL_TYPE_UINT:
+             case GLSL_TYPE_INT:
+                if (state->is_version(120, 300))
+                   break;
+             case GLSL_TYPE_DOUBLE:
+                if (check_type->base_type == GLSL_TYPE_DOUBLE && (state->is_version(410, 0) || state->ARB_vertex_attrib_64bit_enable))
+                   break;
+             /* FALLTHROUGH */
+             default:
+                _mesa_glsl_error(& loc, state,
+                                 "vertex shader input / attribute cannot have "
+                                 "type %s`%s'",
+                                 var->type->is_array() ? "array of " : "",
+                                 check_type->name);
+                error_emitted = true;
+             }
+ 
+             if (!error_emitted && var->type->is_array() &&
+                 !state->check_version(150, 0, &loc,
+                                       "vertex shader input / attribute "
+                                       "cannot have array type")) {
+                error_emitted = true;
+             }
+          } else if (state->stage == MESA_SHADER_GEOMETRY) {
+             /* From section 4.3.4 (Inputs) of the GLSL 1.50 spec:
+              *
+              *     Geometry shader input variables get the per-vertex values
+              *     written out by vertex shader output variables of the same
+              *     names. Since a geometry shader operates on a set of
+              *     vertices, each input varying variable (or input block, see
+              *     interface blocks below) needs to be declared as an array.
+              */
+             if (!var->type->is_array()) {
+                _mesa_glsl_error(&loc, state,
+                                 "geometry shader inputs must be arrays");
+             }
+ 
+             handle_geometry_shader_input_decl(state, loc, var);
+          } else if (state->stage == MESA_SHADER_FRAGMENT) {
+             /* From section 4.3.4 (Input Variables) of the GLSL ES 3.10 spec:
+              *
+              *     It is a compile-time error to declare a fragment shader
+              *     input with, or that contains, any of the following types:
+              *
+              *     * A boolean type
+              *     * An opaque type
+              *     * An array of arrays
+              *     * An array of structures
+              *     * A structure containing an array
+              *     * A structure containing a structure
+              */
+             if (state->es_shader) {
+                const glsl_type *check_type = var->type->without_array();
+                if (check_type->is_boolean() ||
+                    check_type->contains_opaque()) {
+                   _mesa_glsl_error(&loc, state,
+                                    "fragment shader input cannot have type %s",
+                                    check_type->name);
+                }
+                if (var->type->is_array() &&
+                    var->type->fields.array->is_array()) {
+                   _mesa_glsl_error(&loc, state,
+                                    "%s shader output "
+                                    "cannot have an array of arrays",
+                                    _mesa_shader_stage_to_string(state->stage));
+                }
+                if (var->type->is_array() &&
+                    var->type->fields.array->is_record()) {
+                   _mesa_glsl_error(&loc, state,
+                                    "fragment shader input "
+                                    "cannot have an array of structs");
+                }
+                if (var->type->is_record()) {
+                   for (unsigned i = 0; i < var->type->length; i++) {
+                      if (var->type->fields.structure[i].type->is_array() ||
+                          var->type->fields.structure[i].type->is_record())
+                         _mesa_glsl_error(&loc, state,
+                                          "fragement shader input cannot have "
+                                          "a struct that contains an "
+                                          "array or struct");
+                   }
+                }
+             }
+          } else if (state->stage == MESA_SHADER_TESS_CTRL ||
+                     state->stage == MESA_SHADER_TESS_EVAL) {
+             handle_tess_shader_input_decl(state, loc, var);
+          }
+       } else if (var->data.mode == ir_var_shader_out) {
+          const glsl_type *check_type = var->type->without_array();
+ 
+          /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec:
+           *
+           *     It is a compile-time error to declare a vertex, tessellation
+           *     evaluation, tessellation control, or geometry shader output
+           *     that contains any of the following:
+           *
+           *     * A Boolean type (bool, bvec2 ...)
+           *     * An opaque type
+           */
+          if (check_type->is_boolean() || check_type->contains_opaque())
+             _mesa_glsl_error(&loc, state,
+                              "%s shader output cannot have type %s",
+                              _mesa_shader_stage_to_string(state->stage),
+                              check_type->name);
+ 
+          /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec:
+           *
+           *     It is a compile-time error to declare a fragment shader output
+           *     that contains any of the following:
+           *
+           *     * A Boolean type (bool, bvec2 ...)
+           *     * A double-precision scalar or vector (double, dvec2 ...)
+           *     * An opaque type
+           *     * Any matrix type
+           *     * A structure
+           */
+          if (state->stage == MESA_SHADER_FRAGMENT) {
+             if (check_type->is_record() || check_type->is_matrix())
+                _mesa_glsl_error(&loc, state,
+                                 "fragment shader output "
+                                 "cannot have struct or matrix type");
+             switch (check_type->base_type) {
+             case GLSL_TYPE_UINT:
+             case GLSL_TYPE_INT:
+             case GLSL_TYPE_FLOAT:
+                break;
+             default:
+                _mesa_glsl_error(&loc, state,
+                                 "fragment shader output cannot have "
+                                 "type %s", check_type->name);
+             }
+          }
+ 
+          /* From section 4.3.6 (Output Variables) of the GLSL ES 3.10 spec:
+           *
+           *     It is a compile-time error to declare a vertex shader output
+           *     with, or that contains, any of the following types:
+           *
+           *     * A boolean type
+           *     * An opaque type
+           *     * An array of arrays
+           *     * An array of structures
+           *     * A structure containing an array
+           *     * A structure containing a structure
+           *
+           *     It is a compile-time error to declare a fragment shader output
+           *     with, or that contains, any of the following types:
+           *
+           *     * A boolean type
+           *     * An opaque type
+           *     * A matrix
+           *     * A structure
+           *     * An array of array
+           */
+          if (state->es_shader) {
+             if (var->type->is_array() &&
+                 var->type->fields.array->is_array()) {
+                _mesa_glsl_error(&loc, state,
+                                 "%s shader output "
+                                 "cannot have an array of arrays",
+                                 _mesa_shader_stage_to_string(state->stage));
+             }
+             if (state->stage == MESA_SHADER_VERTEX) {
+                if (var->type->is_array() &&
+                    var->type->fields.array->is_record()) {
+                   _mesa_glsl_error(&loc, state,
+                                    "vertex shader output "
+                                    "cannot have an array of structs");
+                }
+                if (var->type->is_record()) {
+                   for (unsigned i = 0; i < var->type->length; i++) {
+                      if (var->type->fields.structure[i].type->is_array() ||
+                          var->type->fields.structure[i].type->is_record())
+                         _mesa_glsl_error(&loc, state,
+                                          "vertex shader output cannot have a "
+                                          "struct that contains an "
+                                          "array or struct");
+                   }
+                }
+             }
+          }
+ 
+          if (state->stage == MESA_SHADER_TESS_CTRL) {
+             handle_tess_ctrl_shader_output_decl(state, loc, var);
+          }
+       } else if (var->type->contains_subroutine()) {
+          /* declare subroutine uniforms as hidden */
+          var->data.how_declared = ir_var_hidden;
+       }
+ 
+       /* Integer fragment inputs must be qualified with 'flat'.  In GLSL ES,
+        * so must integer vertex outputs.
+        *
+        * From section 4.3.4 ("Inputs") of the GLSL 1.50 spec:
+        *    "Fragment shader inputs that are signed or unsigned integers or
+        *    integer vectors must be qualified with the interpolation qualifier
+        *    flat."
+        *
+        * From section 4.3.4 ("Input Variables") of the GLSL 3.00 ES spec:
+        *    "Fragment shader inputs that are, or contain, signed or unsigned
+        *    integers or integer vectors must be qualified with the
+        *    interpolation qualifier flat."
+        *
+        * From section 4.3.6 ("Output Variables") of the GLSL 3.00 ES spec:
+        *    "Vertex shader outputs that are, or contain, signed or unsigned
+        *    integers or integer vectors must be qualified with the
+        *    interpolation qualifier flat."
+        *
+        * Note that prior to GLSL 1.50, this requirement applied to vertex
+        * outputs rather than fragment inputs.  That creates problems in the
+        * presence of geometry shaders, so we adopt the GLSL 1.50 rule for all
+        * desktop GL shaders.  For GLSL ES shaders, we follow the spec and
+        * apply the restriction to both vertex outputs and fragment inputs.
+        *
+        * Note also that the desktop GLSL specs are missing the text "or
+        * contain"; this is presumably an oversight, since there is no
+        * reasonable way to interpolate a fragment shader input that contains
+        * an integer.
+        */
+       if (state->is_version(130, 300) &&
+           var->type->contains_integer() &&
+           var->data.interpolation != INTERP_QUALIFIER_FLAT &&
+           ((state->stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_in)
+            || (state->stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_out
+                && state->es_shader))) {
+          const char *var_type = (state->stage == MESA_SHADER_VERTEX) ?
+             "vertex output" : "fragment input";
+          _mesa_glsl_error(&loc, state, "if a %s is (or contains) "
+                           "an integer, then it must be qualified with 'flat'",
+                           var_type);
+       }
+ 
+       /* Double fragment inputs must be qualified with 'flat'. */
+       if (var->type->contains_double() &&
+           var->data.interpolation != INTERP_QUALIFIER_FLAT &&
+           state->stage == MESA_SHADER_FRAGMENT &&
+           var->data.mode == ir_var_shader_in) {
+          _mesa_glsl_error(&loc, state, "if a fragment input is (or contains) "
+                           "a double, then it must be qualified with 'flat'",
+                           var_type);
+       }
+ 
+       /* Interpolation qualifiers cannot be applied to 'centroid' and
+        * 'centroid varying'.
+        *
+        * From page 29 (page 35 of the PDF) of the GLSL 1.30 spec:
+        *    "interpolation qualifiers may only precede the qualifiers in,
+        *    centroid in, out, or centroid out in a declaration. They do not apply
+        *    to the deprecated storage qualifiers varying or centroid varying."
+        *
+        * These deprecated storage qualifiers do not exist in GLSL ES 3.00.
+        */
+       if (state->is_version(130, 0)
+           && this->type->qualifier.has_interpolation()
+           && this->type->qualifier.flags.q.varying) {
+ 
+          const char *i = this->type->qualifier.interpolation_string();
+          assert(i != NULL);
+          const char *s;
+          if (this->type->qualifier.flags.q.centroid)
+             s = "centroid varying";
+          else
+             s = "varying";
+ 
+          _mesa_glsl_error(&loc, state,
+                           "qualifier '%s' cannot be applied to the "
+                           "deprecated storage qualifier '%s'", i, s);
+       }
+ 
+ 
+       /* Interpolation qualifiers can only apply to vertex shader outputs and
+        * fragment shader inputs.
+        *
+        * From page 29 (page 35 of the PDF) of the GLSL 1.30 spec:
+        *    "Outputs from a vertex shader (out) and inputs to a fragment
+        *    shader (in) can be further qualified with one or more of these
+        *    interpolation qualifiers"
+        *
+        * From page 31 (page 37 of the PDF) of the GLSL ES 3.00 spec:
+        *    "These interpolation qualifiers may only precede the qualifiers
+        *    in, centroid in, out, or centroid out in a declaration. They do
+        *    not apply to inputs into a vertex shader or outputs from a
+        *    fragment shader."
+        */
+       if (state->is_version(130, 300)
+           && this->type->qualifier.has_interpolation()) {
+ 
+          const char *i = this->type->qualifier.interpolation_string();
+          assert(i != NULL);
+ 
+          switch (state->stage) {
+          case MESA_SHADER_VERTEX:
+             if (this->type->qualifier.flags.q.in) {
+                _mesa_glsl_error(&loc, state,
+                                 "qualifier '%s' cannot be applied to vertex "
+                                 "shader inputs", i);
+             }
+             break;
+          case MESA_SHADER_FRAGMENT:
+             if (this->type->qualifier.flags.q.out) {
+                _mesa_glsl_error(&loc, state,
+                                 "qualifier '%s' cannot be applied to fragment "
+                                 "shader outputs", i);
+             }
+             break;
+          default:
+             break;
+          }
+       }
+ 
+ 
+       /* From section 4.3.4 of the GLSL 4.00 spec:
+        *    "Input variables may not be declared using the patch in qualifier
+        *    in tessellation control or geometry shaders."
+        *
+        * From section 4.3.6 of the GLSL 4.00 spec:
+        *    "It is an error to use patch out in a vertex, tessellation
+        *    evaluation, or geometry shader."
+        *
+        * This doesn't explicitly forbid using them in a fragment shader, but
+        * that's probably just an oversight.
+        */
+       if (state->stage != MESA_SHADER_TESS_EVAL
+           && this->type->qualifier.flags.q.patch
+           && this->type->qualifier.flags.q.in) {
+ 
+          _mesa_glsl_error(&loc, state, "'patch in' can only be used in a "
+                           "tessellation evaluation shader");
+       }
+ 
+       if (state->stage != MESA_SHADER_TESS_CTRL
+           && this->type->qualifier.flags.q.patch
+           && this->type->qualifier.flags.q.out) {
+ 
+          _mesa_glsl_error(&loc, state, "'patch out' can only be used in a "
+                           "tessellation control shader");
+       }
+ 
+       /* Precision qualifiers exists only in GLSL versions 1.00 and >= 1.30.
+        */
+       if (this->type->qualifier.precision != ast_precision_none) {
+          state->check_precision_qualifiers_allowed(&loc);
+       }
+ 
+ 
+       /* If a precision qualifier is allowed on a type, it is allowed on
+        * an array of that type.
+        */
+       if (!(this->type->qualifier.precision == ast_precision_none
+           || precision_qualifier_allowed(var->type->without_array()))) {
+ 
+          _mesa_glsl_error(&loc, state,
+                           "precision qualifiers apply only to floating point"
+                           ", integer and opaque types");
+       }
+ 
+       /* From section 4.1.7 of the GLSL 4.40 spec:
+        *
+        *    "[Opaque types] can only be declared as function
+        *     parameters or uniform-qualified variables."
+        */
+       if (var_type->contains_opaque() &&
+           !this->type->qualifier.flags.q.uniform) {
+          _mesa_glsl_error(&loc, state,
+                           "opaque variables must be declared uniform");
+       }
+ 
+       /* Process the initializer and add its instructions to a temporary
+        * list.  This list will be added to the instruction stream (below) after
+        * the declaration is added.  This is done because in some cases (such as
+        * redeclarations) the declaration may not actually be added to the
+        * instruction stream.
+        */
+       exec_list initializer_instructions;
+ 
+       /* Examine var name here since var may get deleted in the next call */
+       bool var_is_gl_id = is_gl_identifier(var->name);
+ 
+       ir_variable *earlier =
+          get_variable_being_redeclared(var, decl->get_location(), state,
+                                        false /* allow_all_redeclarations */);
+       if (earlier != NULL) {
+          if (var_is_gl_id &&
+              earlier->data.how_declared == ir_var_declared_in_block) {
+             _mesa_glsl_error(&loc, state,
+                              "`%s' has already been redeclared using "
+                              "gl_PerVertex", earlier->name);
+          }
+          earlier->data.how_declared = ir_var_declared_normally;
+       }
+ 
+       if (decl->initializer != NULL) {
+          result = process_initializer((earlier == NULL) ? var : earlier,
+                                       decl, this->type,
+                                       &initializer_instructions, state);
+       } else {
+          validate_array_dimensions(var_type, state, &loc);
+       }
+ 
+       /* From page 23 (page 29 of the PDF) of the GLSL 1.10 spec:
+        *
+        *     "It is an error to write to a const variable outside of
+        *      its declaration, so they must be initialized when
+        *      declared."
+        */
+       if (this->type->qualifier.flags.q.constant && decl->initializer == NULL) {
+          _mesa_glsl_error(& loc, state,
+                           "const declaration of `%s' must be initialized",
+                           decl->identifier);
+       }
+ 
+       if (state->es_shader) {
+          const glsl_type *const t = (earlier == NULL)
+             ? var->type : earlier->type;
+ 
+          if (t->is_unsized_array())
+             /* Section 10.17 of the GLSL ES 1.00 specification states that
+              * unsized array declarations have been removed from the language.
+              * Arrays that are sized using an initializer are still explicitly
+              * sized.  However, GLSL ES 1.00 does not allow array
+              * initializers.  That is only allowed in GLSL ES 3.00.
+              *
+              * Section 4.1.9 (Arrays) of the GLSL ES 3.00 spec says:
+              *
+              *     "An array type can also be formed without specifying a size
+              *     if the definition includes an initializer:
+              *
+              *         float x[] = float[2] (1.0, 2.0);     // declares an array of size 2
+              *         float y[] = float[] (1.0, 2.0, 3.0); // declares an array of size 3
+              *
+              *         float a[5];
+              *         float b[] = a;"
+              */
+             _mesa_glsl_error(& loc, state,
+                              "unsized array declarations are not allowed in "
+                              "GLSL ES");
+       }
+ 
+       /* If the declaration is not a redeclaration, there are a few additional
+        * semantic checks that must be applied.  In addition, variable that was
+        * created for the declaration should be added to the IR stream.
+        */
+       if (earlier == NULL) {
+          validate_identifier(decl->identifier, loc, state);
+ 
+          /* Add the variable to the symbol table.  Note that the initializer's
+           * IR was already processed earlier (though it hasn't been emitted
+           * yet), without the variable in scope.
+           *
+           * This differs from most C-like languages, but it follows the GLSL
+           * specification.  From page 28 (page 34 of the PDF) of the GLSL 1.50
+           * spec:
+           *
+           *     "Within a declaration, the scope of a name starts immediately
+           *     after the initializer if present or immediately after the name
+           *     being declared if not."
+           */
+          if (!state->symbols->add_variable(var)) {
+             YYLTYPE loc = this->get_location();
+             _mesa_glsl_error(&loc, state, "name `%s' already taken in the "
+                              "current scope", decl->identifier);
+             continue;
+          }
+ 
+          /* Push the variable declaration to the top.  It means that all the
+           * variable declarations will appear in a funny last-to-first order,
+           * but otherwise we run into trouble if a function is prototyped, a
+           * global var is decled, then the function is defined with usage of
+           * the global var.  See glslparsertest's CorrectModule.frag.
+           */
+          instructions->push_head(var);
+       }
+ 
+       instructions->append_list(&initializer_instructions);
+    }
+ 
+ 
+    /* Generally, variable declarations do not have r-values.  However,
+     * one is used for the declaration in
+     *
+     * while (bool b = some_condition()) {
+     *   ...
+     * }
+     *
+     * so we return the rvalue from the last seen declaration here.
+     */
+    return result;
+ }
+ 
+ 
+ ir_rvalue *
+ ast_parameter_declarator::hir(exec_list *instructions,
+                               struct _mesa_glsl_parse_state *state)
+ {
+    void *ctx = state;
+    const struct glsl_type *type;
+    const char *name = NULL;
+    YYLTYPE loc = this->get_location();
+ 
+    type = this->type->glsl_type(& name, state);
+ 
+    if (type == NULL) {
+       if (name != NULL) {
+          _mesa_glsl_error(& loc, state,
+                           "invalid type `%s' in declaration of `%s'",
+                           name, this->identifier);
+       } else {
+          _mesa_glsl_error(& loc, state,
+                           "invalid type in declaration of `%s'",
+                           this->identifier);
+       }
+ 
+       type = glsl_type::error_type;
+    }
+ 
+    /* From page 62 (page 68 of the PDF) of the GLSL 1.50 spec:
+     *
+     *    "Functions that accept no input arguments need not use void in the
+     *    argument list because prototypes (or definitions) are required and
+     *    therefore there is no ambiguity when an empty argument list "( )" is
+     *    declared. The idiom "(void)" as a parameter list is provided for
+     *    convenience."
+     *
+     * Placing this check here prevents a void parameter being set up
+     * for a function, which avoids tripping up checks for main taking
+     * parameters and lookups of an unnamed symbol.
+     */
+    if (type->is_void()) {
+       if (this->identifier != NULL)
+          _mesa_glsl_error(& loc, state,
+                           "named parameter cannot have type `void'");
+ 
+       is_void = true;
+       return NULL;
+    }
+ 
+    if (formal_parameter && (this->identifier == NULL)) {
+       _mesa_glsl_error(& loc, state, "formal parameter lacks a name");
+       return NULL;
+    }
+ 
+    /* This only handles "vec4 foo[..]".  The earlier specifier->glsl_type(...)
+     * call already handled the "vec4[..] foo" case.
+     */
+    type = process_array_type(&loc, type, this->array_specifier, state);
+ 
+    if (!type->is_error() && type->is_unsized_array()) {
+       _mesa_glsl_error(&loc, state, "arrays passed as parameters must have "
+                        "a declared size");
+       type = glsl_type::error_type;
+    }
+ 
+    is_void = false;
+    ir_variable *var = new(ctx)
+       ir_variable(type, this->identifier, ir_var_function_in);
+ 
+    /* Apply any specified qualifiers to the parameter declaration.  Note that
+     * for function parameters the default mode is 'in'.
+     */
+    apply_type_qualifier_to_variable(& this->type->qualifier, var, state, & loc,
+                                     true);
+ 
+    /* From section 4.1.7 of the GLSL 4.40 spec:
+     *
+     *   "Opaque variables cannot be treated as l-values; hence cannot
+     *    be used as out or inout function parameters, nor can they be
+     *    assigned into."
+     */
+    if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out)
+        && type->contains_opaque()) {
+       _mesa_glsl_error(&loc, state, "out and inout parameters cannot "
+                        "contain opaque variables");
+       type = glsl_type::error_type;
+    }
+ 
+    /* From page 39 (page 45 of the PDF) of the GLSL 1.10 spec:
+     *
+     *    "When calling a function, expressions that do not evaluate to
+     *     l-values cannot be passed to parameters declared as out or inout."
+     *
+     * From page 32 (page 38 of the PDF) of the GLSL 1.10 spec:
+     *
+     *    "Other binary or unary expressions, non-dereferenced arrays,
+     *     function names, swizzles with repeated fields, and constants
+     *     cannot be l-values."
+     *
+     * So for GLSL 1.10, passing an array as an out or inout parameter is not
+     * allowed.  This restriction is removed in GLSL 1.20, and in GLSL ES.
+     */
+    if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out)
+        && type->is_array()
+        && !state->check_version(120, 100, &loc,
+                                 "arrays cannot be out or inout parameters")) {
+       type = glsl_type::error_type;
+    }
+ 
+    instructions->push_tail(var);
+ 
+    /* Parameter declarations do not have r-values.
+     */
+    return NULL;
+ }
+ 
+ 
+ void
+ ast_parameter_declarator::parameters_to_hir(exec_list *ast_parameters,
+                                             bool formal,
+                                             exec_list *ir_parameters,
+                                             _mesa_glsl_parse_state *state)
+ {
+    ast_parameter_declarator *void_param = NULL;
+    unsigned count = 0;
+ 
+    foreach_list_typed (ast_parameter_declarator, param, link, ast_parameters) {
+       param->formal_parameter = formal;
+       param->hir(ir_parameters, state);
+ 
+       if (param->is_void)
+          void_param = param;
+ 
+       count++;
+    }
+ 
+    if ((void_param != NULL) && (count > 1)) {
+       YYLTYPE loc = void_param->get_location();
+ 
+       _mesa_glsl_error(& loc, state,
+                        "`void' parameter must be only parameter");
+    }
+ }
+ 
+ 
+ void
+ emit_function(_mesa_glsl_parse_state *state, ir_function *f)
+ {
+    /* IR invariants disallow function declarations or definitions
+     * nested within other function definitions.  But there is no
+     * requirement about the relative order of function declarations
+     * and definitions with respect to one another.  So simply insert
+     * the new ir_function block at the end of the toplevel instruction
+     * list.
+     */
+    state->toplevel_ir->push_tail(f);
+ }
+ 
+ 
+ ir_rvalue *
+ ast_function::hir(exec_list *instructions,
+                   struct _mesa_glsl_parse_state *state)
+ {
+    void *ctx = state;
+    ir_function *f = NULL;
+    ir_function_signature *sig = NULL;
+    exec_list hir_parameters;
+    YYLTYPE loc = this->get_location();
+ 
+    const char *const name = identifier;
+ 
+    /* New functions are always added to the top-level IR instruction stream,
+     * so this instruction list pointer is ignored.  See also emit_function
+     * (called below).
+     */
+    (void) instructions;
+ 
+    /* From page 21 (page 27 of the PDF) of the GLSL 1.20 spec,
+     *
+     *   "Function declarations (prototypes) cannot occur inside of functions;
+     *   they must be at global scope, or for the built-in functions, outside
+     *   the global scope."
+     *
+     * From page 27 (page 33 of the PDF) of the GLSL ES 1.00.16 spec,
+     *
+     *   "User defined functions may only be defined within the global scope."
+     *
+     * Note that this language does not appear in GLSL 1.10.
+     */
+    if ((state->current_function != NULL) &&
+        state->is_version(120, 100)) {
+       YYLTYPE loc = this->get_location();
+       _mesa_glsl_error(&loc, state,
+                      "declaration of function `%s' not allowed within "
+                      "function body", name);
+    }
+ 
+    validate_identifier(name, this->get_location(), state);
+ 
+    /* Convert the list of function parameters to HIR now so that they can be
+     * used below to compare this function's signature with previously seen
+     * signatures for functions with the same name.
+     */
+    ast_parameter_declarator::parameters_to_hir(& this->parameters,
+                                                is_definition,
+                                                & hir_parameters, state);
+ 
+    const char *return_type_name;
+    const glsl_type *return_type =
+       this->return_type->glsl_type(& return_type_name, state);
+ 
+    if (!return_type) {
+       YYLTYPE loc = this->get_location();
+       _mesa_glsl_error(&loc, state,
+                        "function `%s' has undeclared return type `%s'",
+                        name, return_type_name);
+       return_type = glsl_type::error_type;
+    }
+ 
+    /* ARB_shader_subroutine states:
+     *  "Subroutine declarations cannot be prototyped. It is an error to prepend
+     *   subroutine(...) to a function declaration."
+     */
+    if (this->return_type->qualifier.flags.q.subroutine_def && !is_definition) {
+       YYLTYPE loc = this->get_location();
+       _mesa_glsl_error(&loc, state,
+                        "function declaration `%s' cannot have subroutine prepended",
+                        name);
+    }
+ 
+    /* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec:
+     * "No qualifier is allowed on the return type of a function."
+     */
+    if (this->return_type->has_qualifiers(state)) {
+       YYLTYPE loc = this->get_location();
+       _mesa_glsl_error(& loc, state,
+                        "function `%s' return type has qualifiers", name);
+    }
+ 
+    /* Section 6.1 (Function Definitions) of the GLSL 1.20 spec says:
+     *
+     *     "Arrays are allowed as arguments and as the return type. In both
+     *     cases, the array must be explicitly sized."
+     */
+    if (return_type->is_unsized_array()) {
+       YYLTYPE loc = this->get_location();
+       _mesa_glsl_error(& loc, state,
+                        "function `%s' return type array must be explicitly "
+                        "sized", name);
+    }
+ 
+    /* From section 4.1.7 of the GLSL 4.40 spec:
+     *
+     *    "[Opaque types] can only be declared as function parameters
+     *     or uniform-qualified variables."
+     */
+    if (return_type->contains_opaque()) {
+       YYLTYPE loc = this->get_location();
+       _mesa_glsl_error(&loc, state,
+                        "function `%s' return type can't contain an opaque type",
+                        name);
+    }
+ 
+    /* Create an ir_function if one doesn't already exist. */
+    f = state->symbols->get_function(name);
+    if (f == NULL) {
+       f = new(ctx) ir_function(name);
+       if (!this->return_type->qualifier.flags.q.subroutine) {
+          if (!state->symbols->add_function(f)) {
+             /* This function name shadows a non-function use of the same name. */
+             YYLTYPE loc = this->get_location();
+             _mesa_glsl_error(&loc, state, "function name `%s' conflicts with "
+                              "non-function", name);
+             return NULL;
+          }
+       }
+       emit_function(state, f);
+    }
+ 
+    /* From GLSL ES 3.0 spec, chapter 6.1 "Function Definitions", page 71:
+     *
+     * "A shader cannot redefine or overload built-in functions."
+     *
+     * While in GLSL ES 1.0 specification, chapter 8 "Built-in Functions":
+     *
+     * "User code can overload the built-in functions but cannot redefine
+     * them."
+     */
+    if (state->es_shader && state->language_version >= 300) {
+       /* Local shader has no exact candidates; check the built-ins. */
+       _mesa_glsl_initialize_builtin_functions();
+       if (_mesa_glsl_find_builtin_function_by_name(name)) {
+          YYLTYPE loc = this->get_location();
+          _mesa_glsl_error(& loc, state,
+                           "A shader cannot redefine or overload built-in "
+                           "function `%s' in GLSL ES 3.00", name);
+          return NULL;
+       }
+    }
+ 
+    /* Verify that this function's signature either doesn't match a previously
+     * seen signature for a function with the same name, or, if a match is found,
+     * that the previously seen signature does not have an associated definition.
+     */
+    if (state->es_shader || f->has_user_signature()) {
+       sig = f->exact_matching_signature(state, &hir_parameters);
+       if (sig != NULL) {
+          const char *badvar = sig->qualifiers_match(&hir_parameters);
+          if (badvar != NULL) {
+             YYLTYPE loc = this->get_location();
+ 
+             _mesa_glsl_error(&loc, state, "function `%s' parameter `%s' "
+                              "qualifiers don't match prototype", name, badvar);
+          }
+ 
+          if (sig->return_type != return_type) {
+             YYLTYPE loc = this->get_location();
+ 
+             _mesa_glsl_error(&loc, state, "function `%s' return type doesn't "
+                              "match prototype", name);
+          }
+ 
+          if (sig->is_defined) {
+             if (is_definition) {
+                YYLTYPE loc = this->get_location();
+                _mesa_glsl_error(& loc, state, "function `%s' redefined", name);
+             } else {
+                /* We just encountered a prototype that exactly matches a
+                 * function that's already been defined.  This is redundant,
+                 * and we should ignore it.
+                 */
+                return NULL;
+             }
+          }
+       }
+    }
+ 
+    /* Verify the return type of main() */
+    if (strcmp(name, "main") == 0) {
+       if (! return_type->is_void()) {
+          YYLTYPE loc = this->get_location();
+ 
+          _mesa_glsl_error(& loc, state, "main() must return void");
+       }
+ 
+       if (!hir_parameters.is_empty()) {
+          YYLTYPE loc = this->get_location();
+ 
+          _mesa_glsl_error(& loc, state, "main() must not take any parameters");
+       }
+    }
+ 
+    /* Finish storing the information about this new function in its signature.
+     */
+    if (sig == NULL) {
+       sig = new(ctx) ir_function_signature(return_type);
+       f->add_signature(sig);
+    }
+ 
+    sig->replace_parameters(&hir_parameters);
+    signature = sig;
+ 
+    if (this->return_type->qualifier.flags.q.subroutine_def) {
+       int idx;
+ 
+       if (this->return_type->qualifier.flags.q.explicit_index) {
+          unsigned qual_index;
+          if (process_qualifier_constant(state, &loc, "index",
+                                         this->return_type->qualifier.index,
+                                         &qual_index)) {
+             if (!state->has_explicit_uniform_location()) {
+                _mesa_glsl_error(&loc, state, "subroutine index requires "
+                                 "GL_ARB_explicit_uniform_location or "
+                                 "GLSL 4.30");
+             } else if (qual_index >= MAX_SUBROUTINES) {
+                _mesa_glsl_error(&loc, state,
+                                 "invalid subroutine index (%d) index must "
+                                 "be a number between 0 and "
+                                 "GL_MAX_SUBROUTINES - 1 (%d)", qual_index,
+                                 MAX_SUBROUTINES - 1);
+             } else {
+                f->subroutine_index = qual_index;
+             }
+          }
+       }
+ 
+       f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length();
+       f->subroutine_types = ralloc_array(state, const struct glsl_type *,
+                                          f->num_subroutine_types);
+       idx = 0;
+       foreach_list_typed(ast_declaration, decl, link, &this->return_type->qualifier.subroutine_list->declarations) {
+          const struct glsl_type *type;
+          /* the subroutine type must be already declared */
+          type = state->symbols->get_type(decl->identifier);
+          if (!type) {
+             _mesa_glsl_error(& loc, state, "unknown type '%s' in subroutine function definition", decl->identifier);
+          }
+          f->subroutine_types[idx++] = type;
+       }
+       state->subroutines = (ir_function **)reralloc(state, state->subroutines,
+                                                     ir_function *,
+                                                     state->num_subroutines + 1);
+       state->subroutines[state->num_subroutines] = f;
+       state->num_subroutines++;
+ 
+    }
+ 
+    if (this->return_type->qualifier.flags.q.subroutine) {
+       if (!state->symbols->add_type(this->identifier, glsl_type::get_subroutine_instance(this->identifier))) {
+          _mesa_glsl_error(& loc, state, "type '%s' previously defined", this->identifier);
+          return NULL;
+       }
+       state->subroutine_types = (ir_function **)reralloc(state, state->subroutine_types,
+                                                          ir_function *,
+                                                          state->num_subroutine_types + 1);
+       state->subroutine_types[state->num_subroutine_types] = f;
+       state->num_subroutine_types++;
+ 
+       f->is_subroutine = true;
+    }
+ 
+    /* Function declarations (prototypes) do not have r-values.
+     */
+    return NULL;
+ }
+ 
+ 
+ ir_rvalue *
+ ast_function_definition::hir(exec_list *instructions,
+                              struct _mesa_glsl_parse_state *state)
+ {
+    prototype->is_definition = true;
+    prototype->hir(instructions, state);
+ 
+    ir_function_signature *signature = prototype->signature;
+    if (signature == NULL)
+       return NULL;
+ 
+    assert(state->current_function == NULL);
+    state->current_function = signature;
+    state->found_return = false;
+ 
+    /* Duplicate parameters declared in the prototype as concrete variables.
+     * Add these to the symbol table.
+     */
+    state->symbols->push_scope();
+    foreach_in_list(ir_variable, var, &signature->parameters) {
+       assert(var->as_variable() != NULL);
+ 
+       /* The only way a parameter would "exist" is if two parameters have
+        * the same name.
+        */
+       if (state->symbols->name_declared_this_scope(var->name)) {
+          YYLTYPE loc = this->get_location();
+ 
+          _mesa_glsl_error(& loc, state, "parameter `%s' redeclared", var->name);
+       } else {
+          state->symbols->add_variable(var);
+       }
+    }
+ 
+    /* Convert the body of the function to HIR. */
+    this->body->hir(&signature->body, state);
+    signature->is_defined = true;
+ 
+    state->symbols->pop_scope();
+ 
+    assert(state->current_function == signature);
+    state->current_function = NULL;
+ 
+    if (!signature->return_type->is_void() && !state->found_return) {
+       YYLTYPE loc = this->get_location();
+       _mesa_glsl_error(& loc, state, "function `%s' has non-void return type "
+                        "%s, but no return statement",
+                        signature->function_name(),
+                        signature->return_type->name);
+    }
+ 
+    /* Function definitions do not have r-values.
+     */
+    return NULL;
+ }
+ 
+ 
+ ir_rvalue *
+ ast_jump_statement::hir(exec_list *instructions,
+                         struct _mesa_glsl_parse_state *state)
+ {
+    void *ctx = state;
+ 
+    switch (mode) {
+    case ast_return: {
+       ir_return *inst;
+       assert(state->current_function);
+ 
+       if (opt_return_value) {
+          ir_rvalue *ret = opt_return_value->hir(instructions, state);
+ 
+          /* The value of the return type can be NULL if the shader says
+           * 'return foo();' and foo() is a function that returns void.
+           *
+           * NOTE: The GLSL spec doesn't say that this is an error.  The type
+           * of the return value is void.  If the return type of the function is
+           * also void, then this should compile without error.  Seriously.
+           */
+          const glsl_type *const ret_type =
+             (ret == NULL) ? glsl_type::void_type : ret->type;
+ 
+          /* Implicit conversions are not allowed for return values prior to
+           * ARB_shading_language_420pack.
+           */
+          if (state->current_function->return_type != ret_type) {
+             YYLTYPE loc = this->get_location();
+ 
+             if (state->has_420pack()) {
+                if (!apply_implicit_conversion(state->current_function->return_type,
+                                               ret, state)) {
+                   _mesa_glsl_error(& loc, state,
+                                    "could not implicitly convert return value "
+                                    "to %s, in function `%s'",
+                                    state->current_function->return_type->name,
+                                    state->current_function->function_name());
+                }
+             } else {
+                _mesa_glsl_error(& loc, state,
+                                 "`return' with wrong type %s, in function `%s' "
+                                 "returning %s",
+                                 ret_type->name,
+                                 state->current_function->function_name(),
+                                 state->current_function->return_type->name);
+             }
+          } else if (state->current_function->return_type->base_type ==
+                     GLSL_TYPE_VOID) {
+             YYLTYPE loc = this->get_location();
+ 
+             /* The ARB_shading_language_420pack, GLSL ES 3.0, and GLSL 4.20
+              * specs add a clarification:
+              *
+              *    "A void function can only use return without a return argument, even if
+              *     the return argument has void type. Return statements only accept values:
+              *
+              *         void func1() { }
+              *         void func2() { return func1(); } // illegal return statement"
+              */
+             _mesa_glsl_error(& loc, state,
+                              "void functions can only use `return' without a "
+                              "return argument");
+          }
+ 
+          inst = new(ctx) ir_return(ret);
+       } else {
+          if (state->current_function->return_type->base_type !=
+              GLSL_TYPE_VOID) {
+             YYLTYPE loc = this->get_location();
+ 
+             _mesa_glsl_error(& loc, state,
+                              "`return' with no value, in function %s returning "
+                              "non-void",
+             state->current_function->function_name());
+          }
+          inst = new(ctx) ir_return;
+       }
+ 
+       state->found_return = true;
+       instructions->push_tail(inst);
+       break;
+    }
+ 
+    case ast_discard:
+       if (state->stage != MESA_SHADER_FRAGMENT) {
+          YYLTYPE loc = this->get_location();
+ 
+          _mesa_glsl_error(& loc, state,
+                           "`discard' may only appear in a fragment shader");
+       }
+       instructions->push_tail(new(ctx) ir_discard);
+       break;
+ 
+    case ast_break:
+    case ast_continue:
+       if (mode == ast_continue &&
+           state->loop_nesting_ast == NULL) {
+          YYLTYPE loc = this->get_location();
+ 
+          _mesa_glsl_error(& loc, state, "continue may only appear in a loop");
+       } else if (mode == ast_break &&
+          state->loop_nesting_ast == NULL &&
+          state->switch_state.switch_nesting_ast == NULL) {
+          YYLTYPE loc = this->get_location();
+ 
+          _mesa_glsl_error(& loc, state,
+                           "break may only appear in a loop or a switch");
+       } else {
+          /* For a loop, inline the for loop expression again, since we don't
+           * know where near the end of the loop body the normal copy of it is
+           * going to be placed.  Same goes for the condition for a do-while
+           * loop.
+           */
+          if (state->loop_nesting_ast != NULL &&
+              mode == ast_continue && !state->switch_state.is_switch_innermost) {
+             if (state->loop_nesting_ast->rest_expression) {
+                state->loop_nesting_ast->rest_expression->hir(instructions,
+                                                              state);
+             }
+             if (state->loop_nesting_ast->mode ==
+                 ast_iteration_statement::ast_do_while) {
+                state->loop_nesting_ast->condition_to_hir(instructions, state);
+             }
+          }
+ 
+          if (state->switch_state.is_switch_innermost &&
+              mode == ast_continue) {
+             /* Set 'continue_inside' to true. */
+             ir_rvalue *const true_val = new (ctx) ir_constant(true);
+             ir_dereference_variable *deref_continue_inside_var =
+                new(ctx) ir_dereference_variable(state->switch_state.continue_inside);
+             instructions->push_tail(new(ctx) ir_assignment(deref_continue_inside_var,
+                                                            true_val));
+ 
+             /* Break out from the switch, continue for the loop will
+              * be called right after switch. */
+             ir_loop_jump *const jump =
+                new(ctx) ir_loop_jump(ir_loop_jump::jump_break);
+             instructions->push_tail(jump);
+ 
+          } else if (state->switch_state.is_switch_innermost &&
+              mode == ast_break) {
+             /* Force break out of switch by inserting a break. */
+             ir_loop_jump *const jump =
+                new(ctx) ir_loop_jump(ir_loop_jump::jump_break);
+             instructions->push_tail(jump);
+          } else {
+             ir_loop_jump *const jump =
+                new(ctx) ir_loop_jump((mode == ast_break)
+                   ? ir_loop_jump::jump_break
+                   : ir_loop_jump::jump_continue);
+             instructions->push_tail(jump);
+          }
+       }
+ 
+       break;
+    }
+ 
+    /* Jump instructions do not have r-values.
+     */
+    return NULL;
+ }
+ 
+ 
+ ir_rvalue *
+ ast_selection_statement::hir(exec_list *instructions,
+                              struct _mesa_glsl_parse_state *state)
+ {
+    void *ctx = state;
+ 
+    ir_rvalue *const condition = this->condition->hir(instructions, state);
+ 
+    /* From page 66 (page 72 of the PDF) of the GLSL 1.50 spec:
+     *
+     *    "Any expression whose type evaluates to a Boolean can be used as the
+     *    conditional expression bool-expression. Vector types are not accepted
+     *    as the expression to if."
+     *
+     * The checks are separated so that higher quality diagnostics can be
+     * generated for cases where both rules are violated.
+     */
+    if (!condition->type->is_boolean() || !condition->type->is_scalar()) {
+       YYLTYPE loc = this->condition->get_location();
+ 
+       _mesa_glsl_error(& loc, state, "if-statement condition must be scalar "
+                        "boolean");
+    }
+ 
+    ir_if *const stmt = new(ctx) ir_if(condition);
+ 
+    if (then_statement != NULL) {
+       state->symbols->push_scope();
+       then_statement->hir(& stmt->then_instructions, state);
+       state->symbols->pop_scope();
+    }
+ 
+    if (else_statement != NULL) {
+       state->symbols->push_scope();
+       else_statement->hir(& stmt->else_instructions, state);
+       state->symbols->pop_scope();
+    }
+ 
+    instructions->push_tail(stmt);
+ 
+    /* if-statements do not have r-values.
+     */
+    return NULL;
+ }
+ 
+ 
+ ir_rvalue *
+ ast_switch_statement::hir(exec_list *instructions,
+                           struct _mesa_glsl_parse_state *state)
+ {
+    void *ctx = state;
+ 
+    ir_rvalue *const test_expression =
+       this->test_expression->hir(instructions, state);
+ 
+    /* From page 66 (page 55 of the PDF) of the GLSL 1.50 spec:
+     *
+     *    "The type of init-expression in a switch statement must be a
+     *     scalar integer."
+     */
+    if (!test_expression->type->is_scalar() ||
+        !test_expression->type->is_integer()) {
+       YYLTYPE loc = this->test_expression->get_location();
+ 
+       _mesa_glsl_error(& loc,
+                        state,
+                        "switch-statement expression must be scalar "
+                        "integer");
+    }
+ 
+    /* Track the switch-statement nesting in a stack-like manner.
+     */
+    struct glsl_switch_state saved = state->switch_state;
+ 
+    state->switch_state.is_switch_innermost = true;
+    state->switch_state.switch_nesting_ast = this;
+    state->switch_state.labels_ht = hash_table_ctor(0, hash_table_pointer_hash,
+                                                  hash_table_pointer_compare);
+    state->switch_state.previous_default = NULL;
+ 
+    /* Initalize is_fallthru state to false.
+     */
+    ir_rvalue *const is_fallthru_val = new (ctx) ir_constant(false);
+    state->switch_state.is_fallthru_var =
+       new(ctx) ir_variable(glsl_type::bool_type,
+                            "switch_is_fallthru_tmp",
+                            ir_var_temporary);
+    instructions->push_tail(state->switch_state.is_fallthru_var);
+ 
+    ir_dereference_variable *deref_is_fallthru_var =
+       new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var);
+    instructions->push_tail(new(ctx) ir_assignment(deref_is_fallthru_var,
+                                                   is_fallthru_val));
+ 
+    /* Initialize continue_inside state to false.
+     */
+    state->switch_state.continue_inside =
+       new(ctx) ir_variable(glsl_type::bool_type,
+                            "continue_inside_tmp",
+                            ir_var_temporary);
+    instructions->push_tail(state->switch_state.continue_inside);
+ 
+    ir_rvalue *const false_val = new (ctx) ir_constant(false);
+    ir_dereference_variable *deref_continue_inside_var =
+       new(ctx) ir_dereference_variable(state->switch_state.continue_inside);
+    instructions->push_tail(new(ctx) ir_assignment(deref_continue_inside_var,
+                                                   false_val));
+ 
+    state->switch_state.run_default =
+       new(ctx) ir_variable(glsl_type::bool_type,
+                              "run_default_tmp",
+                              ir_var_temporary);
+    instructions->push_tail(state->switch_state.run_default);
+ 
+    /* Loop around the switch is used for flow control. */
+    ir_loop * loop = new(ctx) ir_loop();
+    instructions->push_tail(loop);
+ 
+    /* Cache test expression.
+     */
+    test_to_hir(&loop->body_instructions, state);
+ 
+    /* Emit code for body of switch stmt.
+     */
+    body->hir(&loop->body_instructions, state);
+ 
+    /* Insert a break at the end to exit loop. */
+    ir_loop_jump *jump = new(ctx) ir_loop_jump(ir_loop_jump::jump_break);
+    loop->body_instructions.push_tail(jump);
+ 
+    /* If we are inside loop, check if continue got called inside switch. */
+    if (state->loop_nesting_ast != NULL) {
+       ir_dereference_variable *deref_continue_inside =
+          new(ctx) ir_dereference_variable(state->switch_state.continue_inside);
+       ir_if *irif = new(ctx) ir_if(deref_continue_inside);
+       ir_loop_jump *jump = new(ctx) ir_loop_jump(ir_loop_jump::jump_continue);
+ 
+       if (state->loop_nesting_ast != NULL) {
+          if (state->loop_nesting_ast->rest_expression) {
+             state->loop_nesting_ast->rest_expression->hir(&irif->then_instructions,
+                                                           state);
+          }
+          if (state->loop_nesting_ast->mode ==
+              ast_iteration_statement::ast_do_while) {
+             state->loop_nesting_ast->condition_to_hir(&irif->then_instructions, state);
+          }
+       }
+       irif->then_instructions.push_tail(jump);
+       instructions->push_tail(irif);
+    }
+ 
+    hash_table_dtor(state->switch_state.labels_ht);
+ 
+    state->switch_state = saved;
+ 
+    /* Switch statements do not have r-values. */
+    return NULL;
+ }
+ 
+ 
+ void
+ ast_switch_statement::test_to_hir(exec_list *instructions,
+                                   struct _mesa_glsl_parse_state *state)
+ {
+    void *ctx = state;
+ 
+    /* Cache value of test expression. */
+    ir_rvalue *const test_val =
+       test_expression->hir(instructions,
+                          state);
+ 
+    state->switch_state.test_var = new(ctx) ir_variable(test_val->type,
+                                                        "switch_test_tmp",
+                                                        ir_var_temporary);
+    ir_dereference_variable *deref_test_var =
+       new(ctx) ir_dereference_variable(state->switch_state.test_var);
+ 
+    instructions->push_tail(state->switch_state.test_var);
+    instructions->push_tail(new(ctx) ir_assignment(deref_test_var, test_val));
+ }
+ 
+ 
+ ir_rvalue *
+ ast_switch_body::hir(exec_list *instructions,
+                      struct _mesa_glsl_parse_state *state)
+ {
+    if (stmts != NULL)
+       stmts->hir(instructions, state);
+ 
+    /* Switch bodies do not have r-values. */
+    return NULL;
+ }
+ 
+ ir_rvalue *
+ ast_case_statement_list::hir(exec_list *instructions,
+                              struct _mesa_glsl_parse_state *state)
+ {
+    exec_list default_case, after_default, tmp;
+ 
+    foreach_list_typed (ast_case_statement, case_stmt, link, & this->cases) {
+       case_stmt->hir(&tmp, state);
+ 
+       /* Default case. */
+       if (state->switch_state.previous_default && default_case.is_empty()) {
+          default_case.append_list(&tmp);
+          continue;
+       }
+ 
+       /* If default case found, append 'after_default' list. */
+       if (!default_case.is_empty())
+          after_default.append_list(&tmp);
+       else
+          instructions->append_list(&tmp);
+    }
+ 
+    /* Handle the default case. This is done here because default might not be
+     * the last case. We need to add checks against following cases first to see
+     * if default should be chosen or not.
+     */
+    if (!default_case.is_empty()) {
+ 
+       ir_rvalue *const true_val = new (state) ir_constant(true);
+       ir_dereference_variable *deref_run_default_var =
+          new(state) ir_dereference_variable(state->switch_state.run_default);
+ 
+       /* Choose to run default case initially, following conditional
+        * assignments might change this.
+        */
+       ir_assignment *const init_var =
+          new(state) ir_assignment(deref_run_default_var, true_val);
+       instructions->push_tail(init_var);
+ 
+       /* Default case was the last one, no checks required. */
+       if (after_default.is_empty()) {
+          instructions->append_list(&default_case);
+          return NULL;
+       }
+ 
+       foreach_in_list(ir_instruction, ir, &after_default) {
+          ir_assignment *assign = ir->as_assignment();
+ 
+          if (!assign)
+             continue;
+ 
+          /* Clone the check between case label and init expression. */
+          ir_expression *exp = (ir_expression*) assign->condition;
+          ir_expression *clone = exp->clone(state, NULL);
+ 
+          ir_dereference_variable *deref_var =
+             new(state) ir_dereference_variable(state->switch_state.run_default);
+          ir_rvalue *const false_val = new (state) ir_constant(false);
+ 
+          ir_assignment *const set_false =
+             new(state) ir_assignment(deref_var, false_val, clone);
+ 
+          instructions->push_tail(set_false);
+       }
+ 
+       /* Append default case and all cases after it. */
+       instructions->append_list(&default_case);
+       instructions->append_list(&after_default);
+    }
+ 
+    /* Case statements do not have r-values. */
+    return NULL;
+ }
+ 
+ ir_rvalue *
+ ast_case_statement::hir(exec_list *instructions,
+                         struct _mesa_glsl_parse_state *state)
+ {
+    labels->hir(instructions, state);
+ 
+    /* Guard case statements depending on fallthru state. */
+    ir_dereference_variable *const deref_fallthru_guard =
+       new(state) ir_dereference_variable(state->switch_state.is_fallthru_var);
+    ir_if *const test_fallthru = new(state) ir_if(deref_fallthru_guard);
+ 
+    foreach_list_typed (ast_node, stmt, link, & this->stmts)
+       stmt->hir(& test_fallthru->then_instructions, state);
+ 
+    instructions->push_tail(test_fallthru);
+ 
+    /* Case statements do not have r-values. */
+    return NULL;
+ }
+ 
+ 
+ ir_rvalue *
+ ast_case_label_list::hir(exec_list *instructions,
+                          struct _mesa_glsl_parse_state *state)
+ {
+    foreach_list_typed (ast_case_label, label, link, & this->labels)
+       label->hir(instructions, state);
+ 
+    /* Case labels do not have r-values. */
+    return NULL;
+ }
+ 
+ ir_rvalue *
+ ast_case_label::hir(exec_list *instructions,
+                     struct _mesa_glsl_parse_state *state)
+ {
+    void *ctx = state;
+ 
+    ir_dereference_variable *deref_fallthru_var =
+       new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var);
+ 
+    ir_rvalue *const true_val = new(ctx) ir_constant(true);
+ 
+    /* If not default case, ... */
+    if (this->test_value != NULL) {
+       /* Conditionally set fallthru state based on
+        * comparison of cached test expression value to case label.
+        */
+       ir_rvalue *const label_rval = this->test_value->hir(instructions, state);
+       ir_constant *label_const = label_rval->constant_expression_value();
+ 
+       if (!label_const) {
+          YYLTYPE loc = this->test_value->get_location();
+ 
+          _mesa_glsl_error(& loc, state,
+                           "switch statement case label must be a "
+                           "constant expression");
+ 
+          /* Stuff a dummy value in to allow processing to continue. */
+          label_const = new(ctx) ir_constant(0);
+       } else {
+          ast_expression *previous_label = (ast_expression *)
+          hash_table_find(state->switch_state.labels_ht,
+                          (void *)(uintptr_t)label_const->value.u[0]);
+ 
+          if (previous_label) {
+             YYLTYPE loc = this->test_value->get_location();
+             _mesa_glsl_error(& loc, state, "duplicate case value");
+ 
+             loc = previous_label->get_location();
+             _mesa_glsl_error(& loc, state, "this is the previous case label");
+          } else {
+             hash_table_insert(state->switch_state.labels_ht,
+                               this->test_value,
+                               (void *)(uintptr_t)label_const->value.u[0]);
+          }
+       }
+ 
+       ir_dereference_variable *deref_test_var =
+          new(ctx) ir_dereference_variable(state->switch_state.test_var);
+ 
+       ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal,
+                                                         label_const,
+                                                         deref_test_var);
+ 
+       /*
+        * From GLSL 4.40 specification section 6.2 ("Selection"):
+        *
+        *     "The type of the init-expression value in a switch statement must
+        *     be a scalar int or uint. The type of the constant-expression value
+        *     in a case label also must be a scalar int or uint. When any pair
+        *     of these values is tested for "equal value" and the types do not
+        *     match, an implicit conversion will be done to convert the int to a
+        *     uint (see section 4.1.10 “Implicit Conversions”) before the compare
+        *     is done."
+        */
+       if (label_const->type != state->switch_state.test_var->type) {
+          YYLTYPE loc = this->test_value->get_location();
+ 
+          const glsl_type *type_a = label_const->type;
+          const glsl_type *type_b = state->switch_state.test_var->type;
+ 
+          /* Check if int->uint implicit conversion is supported. */
+          bool integer_conversion_supported =
+             glsl_type::int_type->can_implicitly_convert_to(glsl_type::uint_type,
+                                                            state);
+ 
+          if ((!type_a->is_integer() || !type_b->is_integer()) ||
+               !integer_conversion_supported) {
+             _mesa_glsl_error(&loc, state, "type mismatch with switch "
+                              "init-expression and case label (%s != %s)",
+                              type_a->name, type_b->name);
+          } else {
+             /* Conversion of the case label. */
+             if (type_a->base_type == GLSL_TYPE_INT) {
+                if (!apply_implicit_conversion(glsl_type::uint_type,
+                                               test_cond->operands[0], state))
+                   _mesa_glsl_error(&loc, state, "implicit type conversion error");
+             } else {
+                /* Conversion of the init-expression value. */
+                if (!apply_implicit_conversion(glsl_type::uint_type,
+                                               test_cond->operands[1], state))
+                   _mesa_glsl_error(&loc, state, "implicit type conversion error");
+             }
+          }
+       }
+ 
+       ir_assignment *set_fallthru_on_test =
+          new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond);
+ 
+       instructions->push_tail(set_fallthru_on_test);
+    } else { /* default case */
+       if (state->switch_state.previous_default) {
+          YYLTYPE loc = this->get_location();
+          _mesa_glsl_error(& loc, state,
+                           "multiple default labels in one switch");
+ 
+          loc = state->switch_state.previous_default->get_location();
+          _mesa_glsl_error(& loc, state, "this is the first default label");
+       }
+       state->switch_state.previous_default = this;
+ 
+       /* Set fallthru condition on 'run_default' bool. */
+       ir_dereference_variable *deref_run_default =
+          new(ctx) ir_dereference_variable(state->switch_state.run_default);
+       ir_rvalue *const cond_true = new(ctx) ir_constant(true);
+       ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal,
+                                                         cond_true,
+                                                         deref_run_default);
+ 
+       /* Set falltrhu state. */
+       ir_assignment *set_fallthru =
+          new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond);
+ 
+       instructions->push_tail(set_fallthru);
+    }
+ 
+    /* Case statements do not have r-values. */
+    return NULL;
+ }
+ 
+ void
+ ast_iteration_statement::condition_to_hir(exec_list *instructions,
+                                           struct _mesa_glsl_parse_state *state)
+ {
+    void *ctx = state;
+ 
+    if (condition != NULL) {
+       ir_rvalue *const cond =
+          condition->hir(instructions, state);
+ 
+       if ((cond == NULL)
+           || !cond->type->is_boolean() || !cond->type->is_scalar()) {
+          YYLTYPE loc = condition->get_location();
+ 
+          _mesa_glsl_error(& loc, state,
+                           "loop condition must be scalar boolean");
+       } else {
+          /* As the first code in the loop body, generate a block that looks
+           * like 'if (!condition) break;' as the loop termination condition.
+           */
+          ir_rvalue *const not_cond =
+             new(ctx) ir_expression(ir_unop_logic_not, cond);
+ 
+          ir_if *const if_stmt = new(ctx) ir_if(not_cond);
+ 
+          ir_jump *const break_stmt =
+             new(ctx) ir_loop_jump(ir_loop_jump::jump_break);
+ 
+          if_stmt->then_instructions.push_tail(break_stmt);
+          instructions->push_tail(if_stmt);
+       }
+    }
+ }
+ 
+ 
+ ir_rvalue *
+ ast_iteration_statement::hir(exec_list *instructions,
+                              struct _mesa_glsl_parse_state *state)
+ {
+    void *ctx = state;
+ 
+    /* For-loops and while-loops start a new scope, but do-while loops do not.
+     */
+    if (mode != ast_do_while)
+       state->symbols->push_scope();
+ 
+    if (init_statement != NULL)
+       init_statement->hir(instructions, state);
+ 
+    ir_loop *const stmt = new(ctx) ir_loop();
+    instructions->push_tail(stmt);
+ 
+    /* Track the current loop nesting. */
+    ast_iteration_statement *nesting_ast = state->loop_nesting_ast;
+ 
+    state->loop_nesting_ast = this;
+ 
+    /* Likewise, indicate that following code is closest to a loop,
+     * NOT closest to a switch.
+     */
+    bool saved_is_switch_innermost = state->switch_state.is_switch_innermost;
+    state->switch_state.is_switch_innermost = false;
+ 
+    if (mode != ast_do_while)
+       condition_to_hir(&stmt->body_instructions, state);
+ 
+    if (body != NULL)
+       body->hir(& stmt->body_instructions, state);
+ 
+    if (rest_expression != NULL)
+       rest_expression->hir(& stmt->body_instructions, state);
+ 
+    if (mode == ast_do_while)
+       condition_to_hir(&stmt->body_instructions, state);
+ 
+    if (mode != ast_do_while)
+       state->symbols->pop_scope();
+ 
+    /* Restore previous nesting before returning. */
+    state->loop_nesting_ast = nesting_ast;
+    state->switch_state.is_switch_innermost = saved_is_switch_innermost;
+ 
+    /* Loops do not have r-values.
+     */
+    return NULL;
+ }
+ 
+ 
+ /**
+  * Determine if the given type is valid for establishing a default precision
+  * qualifier.
+  *
+  * From GLSL ES 3.00 section 4.5.4 ("Default Precision Qualifiers"):
+  *
+  *     "The precision statement
+  *
+  *         precision precision-qualifier type;
+  *
+  *     can be used to establish a default precision qualifier. The type field
+  *     can be either int or float or any of the sampler types, and the
+  *     precision-qualifier can be lowp, mediump, or highp."
+  *
+  * GLSL ES 1.00 has similar language.  GLSL 1.30 doesn't allow precision
+  * qualifiers on sampler types, but this seems like an oversight (since the
+  * intention of including these in GLSL 1.30 is to allow compatibility with ES
+  * shaders).  So we allow int, float, and all sampler types regardless of GLSL
+  * version.
+  */
+ static bool
+ is_valid_default_precision_type(const struct glsl_type *const type)
+ {
+    if (type == NULL)
+       return false;
+ 
+    switch (type->base_type) {
+    case GLSL_TYPE_INT:
+    case GLSL_TYPE_FLOAT:
+       /* "int" and "float" are valid, but vectors and matrices are not. */
+       return type->vector_elements == 1 && type->matrix_columns == 1;
+    case GLSL_TYPE_SAMPLER:
+    case GLSL_TYPE_IMAGE:
+    case GLSL_TYPE_ATOMIC_UINT:
+       return true;
+    default:
+       return false;
+    }
+ }
+ 
+ 
+ ir_rvalue *
+ ast_type_specifier::hir(exec_list *instructions,
+                         struct _mesa_glsl_parse_state *state)
+ {
+    if (this->default_precision == ast_precision_none && this->structure == NULL)
+       return NULL;
+ 
+    YYLTYPE loc = this->get_location();
+ 
+    /* If this is a precision statement, check that the type to which it is
+     * applied is either float or int.
+     *
+     * From section 4.5.3 of the GLSL 1.30 spec:
+     *    "The precision statement
+     *       precision precision-qualifier type;
+     *    can be used to establish a default precision qualifier. The type
+     *    field can be either int or float [...].  Any other types or
+     *    qualifiers will result in an error.
+     */
+    if (this->default_precision != ast_precision_none) {
+       if (!state->check_precision_qualifiers_allowed(&loc))
+          return NULL;
+ 
+       if (this->structure != NULL) {
+          _mesa_glsl_error(&loc, state,
+                           "precision qualifiers do not apply to structures");
+          return NULL;
+       }
+ 
+       if (this->array_specifier != NULL) {
+          _mesa_glsl_error(&loc, state,
+                           "default precision statements do not apply to "
+                           "arrays");
+          return NULL;
+       }
+ 
+       const struct glsl_type *const type =
+          state->symbols->get_type(this->type_name);
+       if (!is_valid_default_precision_type(type)) {
+          _mesa_glsl_error(&loc, state,
+                           "default precision statements apply only to "
+                           "float, int, and opaque types");
+          return NULL;
+       }
+ 
+       if (state->es_shader) {
+          /* Section 4.5.3 (Default Precision Qualifiers) of the GLSL ES 1.00
+           * spec says:
+           *
+           *     "Non-precision qualified declarations will use the precision
+           *     qualifier specified in the most recent precision statement
+           *     that is still in scope. The precision statement has the same
+           *     scoping rules as variable declarations. If it is declared
+           *     inside a compound statement, its effect stops at the end of
+           *     the innermost statement it was declared in. Precision
+           *     statements in nested scopes override precision statements in
+           *     outer scopes. Multiple precision statements for the same basic
+           *     type can appear inside the same scope, with later statements
+           *     overriding earlier statements within that scope."
+           *
+           * Default precision specifications follow the same scope rules as
+           * variables.  So, we can track the state of the default precision
+           * qualifiers in the symbol table, and the rules will just work.  This
+           * is a slight abuse of the symbol table, but it has the semantics
+           * that we want.
+           */
+          state->symbols->add_default_precision_qualifier(this->type_name,
+                                                          this->default_precision);
+       }
+ 
+       /* FINISHME: Translate precision statements into IR. */
+       return NULL;
+    }
+ 
+    /* _mesa_ast_set_aggregate_type() sets the <structure> field so that
+     * process_record_constructor() can do type-checking on C-style initializer
+     * expressions of structs, but ast_struct_specifier should only be translated
+     * to HIR if it is declaring the type of a structure.
+     *
+     * The ->is_declaration field is false for initializers of variables
+     * declared separately from the struct's type definition.
+     *
+     *    struct S { ... };              (is_declaration = true)
+     *    struct T { ... } t = { ... };  (is_declaration = true)
+     *    S s = { ... };                 (is_declaration = false)
+     */
+    if (this->structure != NULL && this->structure->is_declaration)
+       return this->structure->hir(instructions, state);
+ 
+    return NULL;
+ }
+ 
+ 
+ /**
+  * Process a structure or interface block tree into an array of structure fields
+  *
+  * After parsing, where there are some syntax differnces, structures and
+  * interface blocks are almost identical.  They are similar enough that the
+  * AST for each can be processed the same way into a set of
+  * \c glsl_struct_field to describe the members.
+  *
+  * If we're processing an interface block, var_mode should be the type of the
+  * interface block (ir_var_shader_in, ir_var_shader_out, ir_var_uniform or
+  * ir_var_shader_storage).  If we're processing a structure, var_mode should be
+  * ir_var_auto.
+  *
+  * \return
+  * The number of fields processed.  A pointer to the array structure fields is
+  * stored in \c *fields_ret.
+  */
+ static unsigned
+ ast_process_struct_or_iface_block_members(exec_list *instructions,
+                                           struct _mesa_glsl_parse_state *state,
+                                           exec_list *declarations,
+                                           glsl_struct_field **fields_ret,
+                                           bool is_interface,
+                                           enum glsl_matrix_layout matrix_layout,
+                                           bool allow_reserved_names,
+                                           ir_variable_mode var_mode,
+                                           ast_type_qualifier *layout,
+                                           unsigned block_stream,
+                                           unsigned expl_location)
+ {
+    unsigned decl_count = 0;
+ 
+    /* Make an initial pass over the list of fields to determine how
+     * many there are.  Each element in this list is an ast_declarator_list.
+     * This means that we actually need to count the number of elements in the
+     * 'declarations' list in each of the elements.
+     */
+    foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {
+       decl_count += decl_list->declarations.length();
+    }
+ 
+    /* Allocate storage for the fields and process the field
+     * declarations.  As the declarations are processed, try to also convert
+     * the types to HIR.  This ensures that structure definitions embedded in
+     * other structure definitions or in interface blocks are processed.
+     */
+    glsl_struct_field *const fields = ralloc_array(state, glsl_struct_field,
+                                                   decl_count);
+ 
+    bool first_member = true;
+    bool first_member_has_explicit_location;
+ 
+    unsigned i = 0;
+    foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {
+       const char *type_name;
+       YYLTYPE loc = decl_list->get_location();
+ 
+       decl_list->type->specifier->hir(instructions, state);
+ 
+       /* Section 10.9 of the GLSL ES 1.00 specification states that
+        * embedded structure definitions have been removed from the language.
+        */
+       if (state->es_shader && decl_list->type->specifier->structure != NULL) {
+          _mesa_glsl_error(&loc, state, "embedded structure definitions are "
+                           "not allowed in GLSL ES 1.00");
+       }
+ 
+       const glsl_type *decl_type =
+          decl_list->type->glsl_type(& type_name, state);
+ 
+       const struct ast_type_qualifier *const qual =
+          &decl_list->type->qualifier;
+ 
+       /* From section 4.3.9 of the GLSL 4.40 spec:
+        *
+        *    "[In interface blocks] opaque types are not allowed."
+        *
+        * It should be impossible for decl_type to be NULL here.  Cases that
+        * might naturally lead to decl_type being NULL, especially for the
+        * is_interface case, will have resulted in compilation having
+        * already halted due to a syntax error.
+        */
+       assert(decl_type);
+ 
+       if (is_interface && decl_type->contains_opaque()) {
+          _mesa_glsl_error(&loc, state,
+                           "uniform/buffer in non-default interface block contains "
+                           "opaque variable");
+       }
+ 
+       if (decl_type->contains_atomic()) {
+          /* From section 4.1.7.3 of the GLSL 4.40 spec:
+           *
+           *    "Members of structures cannot be declared as atomic counter
+           *     types."
+           */
+          _mesa_glsl_error(&loc, state, "atomic counter in structure, "
+                           "shader storage block or uniform block");
+       }
+ 
+       if (decl_type->contains_image()) {
+          /* FINISHME: Same problem as with atomic counters.
+           * FINISHME: Request clarification from Khronos and add
+           * FINISHME: spec quotation here.
+           */
+          _mesa_glsl_error(&loc, state,
+                           "image in structure, shader storage block or "
+                           "uniform block");
+       }
+ 
+       if (qual->flags.q.explicit_binding) {
+          _mesa_glsl_error(&loc, state,
+                           "binding layout qualifier cannot be applied "
+                           "to struct or interface block members");
+       }
+ 
+       if (is_interface) {
+          if (!first_member) {
+             if (!layout->flags.q.explicit_location &&
+                 ((first_member_has_explicit_location &&
+                   !qual->flags.q.explicit_location) ||
+                  (!first_member_has_explicit_location &&
+                   qual->flags.q.explicit_location))) {
+                _mesa_glsl_error(&loc, state,
+                                 "when block-level location layout qualifier "
+                                 "is not supplied either all members must "
+                                 "have a location layout qualifier or all "
+                                 "members must not have a location layout "
+                                 "qualifier");
+             }
+          } else {
+             first_member = false;
+             first_member_has_explicit_location =
+                qual->flags.q.explicit_location;
+          }
+       }
+ 
+       if (qual->flags.q.std140 ||
+           qual->flags.q.std430 ||
+           qual->flags.q.packed ||
+           qual->flags.q.shared) {
+          _mesa_glsl_error(&loc, state,
+                           "uniform/shader storage block layout qualifiers "
+                           "std140, std430, packed, and shared can only be "
+                           "applied to uniform/shader storage blocks, not "
+                           "members");
+       }
+ 
+       if (qual->flags.q.constant) {
+          _mesa_glsl_error(&loc, state,
+                           "const storage qualifier cannot be applied "
+                           "to struct or interface block members");
+       }
+ 
+       /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
+        *
+        *   "A block member may be declared with a stream identifier, but
+        *   the specified stream must match the stream associated with the
+        *   containing block."
+        */
+       if (qual->flags.q.explicit_stream) {
+          unsigned qual_stream;
+          if (process_qualifier_constant(state, &loc, "stream",
+                                         qual->stream, &qual_stream) &&
+              qual_stream != block_stream) {
+             _mesa_glsl_error(&loc, state, "stream layout qualifier on "
+                              "interface block member does not match "
+                              "the interface block (%u vs %u)", qual_stream,
+                              block_stream);
+          }
+       }
+ 
+       if (qual->flags.q.uniform && qual->has_interpolation()) {
+          _mesa_glsl_error(&loc, state,
+                           "interpolation qualifiers cannot be used "
+                           "with uniform interface blocks");
+       }
+ 
+       if ((qual->flags.q.uniform || !is_interface) &&
+           qual->has_auxiliary_storage()) {
+          _mesa_glsl_error(&loc, state,
+                           "auxiliary storage qualifiers cannot be used "
+                           "in uniform blocks or structures.");
+       }
+ 
+       if (qual->flags.q.row_major || qual->flags.q.column_major) {
+          if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
+             _mesa_glsl_error(&loc, state,
+                              "row_major and column_major can only be "
+                              "applied to interface blocks");
+          } else
+             validate_matrix_layout_for_type(state, &loc, decl_type, NULL);
+       }
+ 
+       if (qual->flags.q.read_only && qual->flags.q.write_only) {
+          _mesa_glsl_error(&loc, state, "buffer variable can't be both "
+                           "readonly and writeonly.");
+       }
+ 
+       foreach_list_typed (ast_declaration, decl, link,
+                           &decl_list->declarations) {
+          YYLTYPE loc = decl->get_location();
+ 
+          if (!allow_reserved_names)
+             validate_identifier(decl->identifier, loc, state);
+ 
+          const struct glsl_type *field_type =
+             process_array_type(&loc, decl_type, decl->array_specifier, state);
+          validate_array_dimensions(field_type, state, &loc);
+          fields[i].type = field_type;
+          fields[i].name = decl->identifier;
+          fields[i].interpolation =
+             interpret_interpolation_qualifier(qual, var_mode, state, &loc);
+          fields[i].centroid = qual->flags.q.centroid ? 1 : 0;
+          fields[i].sample = qual->flags.q.sample ? 1 : 0;
+          fields[i].patch = qual->flags.q.patch ? 1 : 0;
+          fields[i].precision = qual->precision;
+ 
+          if (qual->flags.q.explicit_location) {
+             unsigned qual_location;
+             if (process_qualifier_constant(state, &loc, "location",
+                                            qual->location, &qual_location)) {
+                fields[i].location = VARYING_SLOT_VAR0 + qual_location;
+                expl_location = fields[i].location +
+                   fields[i].type->count_attribute_slots(false);
+             }
+          } else {
+             if (layout && layout->flags.q.explicit_location) {
+                fields[i].location = expl_location;
+                expl_location += fields[i].type->count_attribute_slots(false);
+             } else {
+                fields[i].location = -1;
+             }
+          }
+ 
+          /* Propogate row- / column-major information down the fields of the
+           * structure or interface block.  Structures need this data because
+           * the structure may contain a structure that contains ... a matrix
+           * that need the proper layout.
+           */
+          if (field_type->without_array()->is_matrix()
+              || field_type->without_array()->is_record()) {
+             /* If no layout is specified for the field, inherit the layout
+              * from the block.
+              */
+             fields[i].matrix_layout = matrix_layout;
+ 
+             if (qual->flags.q.row_major)
+                fields[i].matrix_layout = GLSL_MATRIX_LAYOUT_ROW_MAJOR;
+             else if (qual->flags.q.column_major)
+                fields[i].matrix_layout = GLSL_MATRIX_LAYOUT_COLUMN_MAJOR;
+ 
+             /* If we're processing an interface block, the matrix layout must
+              * be decided by this point.
+              */
+             assert(!is_interface
+                    || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR
+                    || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR);
+          }
+ 
+          /* Image qualifiers are allowed on buffer variables, which can only
+           * be defined inside shader storage buffer objects
+           */
+          if (layout && var_mode == ir_var_shader_storage) {
+             /* For readonly and writeonly qualifiers the field definition,
+              * if set, overwrites the layout qualifier.
+              */
+             if (qual->flags.q.read_only) {
+                fields[i].image_read_only = true;
+                fields[i].image_write_only = false;
+             } else if (qual->flags.q.write_only) {
+                fields[i].image_read_only = false;
+                fields[i].image_write_only = true;
+             } else {
+                fields[i].image_read_only = layout->flags.q.read_only;
+                fields[i].image_write_only = layout->flags.q.write_only;
+             }
+ 
+             /* For other qualifiers, we set the flag if either the layout
+              * qualifier or the field qualifier are set
+              */
+             fields[i].image_coherent = qual->flags.q.coherent ||
+                                         layout->flags.q.coherent;
+             fields[i].image_volatile = qual->flags.q._volatile ||
+                                         layout->flags.q._volatile;
+             fields[i].image_restrict = qual->flags.q.restrict_flag ||
+                                         layout->flags.q.restrict_flag;
+          }
+ 
+          i++;
+       }
+    }
+ 
+    assert(i == decl_count);
+ 
+    *fields_ret = fields;
+    return decl_count;
+ }
+ 
+ 
+ ir_rvalue *
+ ast_struct_specifier::hir(exec_list *instructions,
+                           struct _mesa_glsl_parse_state *state)
+ {
+    YYLTYPE loc = this->get_location();
+ 
+    /* Section 4.1.8 (Structures) of the GLSL 1.10 spec says:
+     *
+     *     "Anonymous structures are not supported; so embedded structures must
+     *     have a declarator. A name given to an embedded struct is scoped at
+     *     the same level as the struct it is embedded in."
+     *
+     * The same section of the  GLSL 1.20 spec says:
+     *
+     *     "Anonymous structures are not supported. Embedded structures are not
+     *     supported.
+     *
+     *         struct S { float f; };
+     *         struct T {
+     *             S;              // Error: anonymous structures disallowed
+     *             struct { ... }; // Error: embedded structures disallowed
+     *             S s;            // Okay: nested structures with name are allowed
+     *         };"
+     *
+     * The GLSL ES 1.00 and 3.00 specs have similar langauge and examples.  So,
+     * we allow embedded structures in 1.10 only.
+     */
+    if (state->language_version != 110 && state->struct_specifier_depth != 0)
+       _mesa_glsl_error(&loc, state,
+                      "embedded structure declarations are not allowed");
+ 
+    state->struct_specifier_depth++;
+ 
+    unsigned expl_location = 0;
+    if (layout && layout->flags.q.explicit_location) {
+       if (!process_qualifier_constant(state, &loc, "location",
+                                       layout->location, &expl_location)) {
+          return NULL;
+       } else {
+          expl_location = VARYING_SLOT_VAR0 + expl_location;
+       }
+    }
+ 
+    glsl_struct_field *fields;
+    unsigned decl_count =
+       ast_process_struct_or_iface_block_members(instructions,
+                                                 state,
+                                                 &this->declarations,
+                                                 &fields,
+                                                 false,
+                                                 GLSL_MATRIX_LAYOUT_INHERITED,
+                                                 false /* allow_reserved_names */,
+                                                 ir_var_auto,
+                                                 layout,
+                                                 0, /* for interface only */
+                                                 expl_location);
+ 
+    validate_identifier(this->name, loc, state);
+ 
+    const glsl_type *t =
+       glsl_type::get_record_instance(fields, decl_count, this->name);
+ 
+    if (!state->symbols->add_type(name, t)) {
+       _mesa_glsl_error(& loc, state, "struct `%s' previously defined", name);
+    } else {
+       const glsl_type **s = reralloc(state, state->user_structures,
+                                      const glsl_type *,
+                                      state->num_user_structures + 1);
+       if (s != NULL) {
+          s[state->num_user_structures] = t;
+          state->user_structures = s;
+          state->num_user_structures++;
+       }
+    }
+ 
+    state->struct_specifier_depth--;
+ 
+    /* Structure type definitions do not have r-values.
+     */
+    return NULL;
+ }
+ 
+ 
+ /**
+  * Visitor class which detects whether a given interface block has been used.
+  */
+ class interface_block_usage_visitor : public ir_hierarchical_visitor
+ {
+ public:
+    interface_block_usage_visitor(ir_variable_mode mode, const glsl_type *block)
+       : mode(mode), block(block), found(false)
+    {
+    }
+ 
+    virtual ir_visitor_status visit(ir_dereference_variable *ir)
+    {
+       if (ir->var->data.mode == mode && ir->var->get_interface_type() == block) {
+          found = true;
+          return visit_stop;
+       }
+       return visit_continue;
+    }
+ 
+    bool usage_found() const
+    {
+       return this->found;
+    }
+ 
+ private:
+    ir_variable_mode mode;
+    const glsl_type *block;
+    bool found;
+ };
+ 
+ static bool
+ is_unsized_array_last_element(ir_variable *v)
+ {
+    const glsl_type *interface_type = v->get_interface_type();
+    int length = interface_type->length;
+ 
+    assert(v->type->is_unsized_array());
+ 
+    /* Check if it is the last element of the interface */
+    if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0)
+       return true;
+    return false;
+ }
+ 
+ ir_rvalue *
+ ast_interface_block::hir(exec_list *instructions,
+                          struct _mesa_glsl_parse_state *state)
+ {
+    YYLTYPE loc = this->get_location();
+ 
+    /* Interface blocks must be declared at global scope */
+    if (state->current_function != NULL) {
+       _mesa_glsl_error(&loc, state,
+                        "Interface block `%s' must be declared "
+                        "at global scope",
+                        this->block_name);
+    }
+ 
+    if (!this->layout.flags.q.buffer &&
+        this->layout.flags.q.std430) {
+       _mesa_glsl_error(&loc, state,
+                        "std430 storage block layout qualifier is supported "
+                        "only for shader storage blocks");
+    }
+ 
+    /* The ast_interface_block has a list of ast_declarator_lists.  We
+     * need to turn those into ir_variables with an association
+     * with this uniform block.
+     */
+    enum glsl_interface_packing packing;
+    if (this->layout.flags.q.shared) {
+       packing = GLSL_INTERFACE_PACKING_SHARED;
+    } else if (this->layout.flags.q.packed) {
+       packing = GLSL_INTERFACE_PACKING_PACKED;
+    } else if (this->layout.flags.q.std430) {
+       packing = GLSL_INTERFACE_PACKING_STD430;
+    } else {
+       /* The default layout is std140.
+        */
+       packing = GLSL_INTERFACE_PACKING_STD140;
+    }
+ 
+    ir_variable_mode var_mode;
+    const char *iface_type_name;
+    if (this->layout.flags.q.in) {
+       var_mode = ir_var_shader_in;
+       iface_type_name = "in";
+    } else if (this->layout.flags.q.out) {
+       var_mode = ir_var_shader_out;
+       iface_type_name = "out";
+    } else if (this->layout.flags.q.uniform) {
+       var_mode = ir_var_uniform;
+       iface_type_name = "uniform";
+    } else if (this->layout.flags.q.buffer) {
+       var_mode = ir_var_shader_storage;
+       iface_type_name = "buffer";
+    } else {
+       var_mode = ir_var_auto;
+       iface_type_name = "UNKNOWN";
+       assert(!"interface block layout qualifier not found!");
+    }
+ 
+    enum glsl_matrix_layout matrix_layout = GLSL_MATRIX_LAYOUT_INHERITED;
+    if (this->layout.flags.q.row_major)
+       matrix_layout = GLSL_MATRIX_LAYOUT_ROW_MAJOR;
+    else if (this->layout.flags.q.column_major)
+       matrix_layout = GLSL_MATRIX_LAYOUT_COLUMN_MAJOR;
+ 
+    bool redeclaring_per_vertex = strcmp(this->block_name, "gl_PerVertex") == 0;
+    exec_list declared_variables;
+    glsl_struct_field *fields;
+ 
+    /* Treat an interface block as one level of nesting, so that embedded struct
+     * specifiers will be disallowed.
+     */
+    state->struct_specifier_depth++;
+ 
+    /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+     * that we don't have incompatible qualifiers
+     */
+    if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) {
+       _mesa_glsl_error(&loc, state,
+                        "Interface block sets both readonly and writeonly");
+    }
+ 
+    unsigned qual_stream;
+    if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream,
+                                    &qual_stream) ||
+        !validate_stream_qualifier(&loc, state, qual_stream)) {
+       /* If the stream qualifier is invalid it doesn't make sense to continue
+        * on and try to compare stream layouts on member variables against it
+        * so just return early.
+        */
+       return NULL;
+    }
+ 
+    unsigned expl_location = 0;
+    if (layout.flags.q.explicit_location) {
+       if (!process_qualifier_constant(state, &loc, "location",
+                                       layout.location, &expl_location)) {
+          return NULL;
+       } else {
+          expl_location = VARYING_SLOT_VAR0 + expl_location;
+       }
+    }
+ 
+    unsigned int num_variables =
+       ast_process_struct_or_iface_block_members(&declared_variables,
+                                                 state,
+                                                 &this->declarations,
+                                                 &fields,
+                                                 true,
+                                                 matrix_layout,
+                                                 redeclaring_per_vertex,
+                                                 var_mode,
+                                                 &this->layout,
+                                                 qual_stream,
+                                                 expl_location);
+ 
+    state->struct_specifier_depth--;
+ 
+    if (!redeclaring_per_vertex) {
+       validate_identifier(this->block_name, loc, state);
+ 
+       /* From section 4.3.9 ("Interface Blocks") of the GLSL 4.50 spec:
+        *
+        *     "Block names have no other use within a shader beyond interface
+        *     matching; it is a compile-time error to use a block name at global
+        *     scope for anything other than as a block name."
+        */
+       ir_variable *var = state->symbols->get_variable(this->block_name);
+       if (var && !var->type->is_interface()) {
+          _mesa_glsl_error(&loc, state, "Block name `%s' is "
+                           "already used in the scope.",
+                           this->block_name);
+       }
+    }
+ 
+    const glsl_type *earlier_per_vertex = NULL;
+    if (redeclaring_per_vertex) {
+       /* Find the previous declaration of gl_PerVertex.  If we're redeclaring
+        * the named interface block gl_in, we can find it by looking at the
+        * previous declaration of gl_in.  Otherwise we can find it by looking
+        * at the previous decalartion of any of the built-in outputs,
+        * e.g. gl_Position.
+        *
+        * Also check that the instance name and array-ness of the redeclaration
+        * are correct.
+        */
+       switch (var_mode) {
+       case ir_var_shader_in:
+          if (ir_variable *earlier_gl_in =
+              state->symbols->get_variable("gl_in")) {
+             earlier_per_vertex = earlier_gl_in->get_interface_type();
+          } else {
+             _mesa_glsl_error(&loc, state,
+                              "redeclaration of gl_PerVertex input not allowed "
+                              "in the %s shader",
+                              _mesa_shader_stage_to_string(state->stage));
+          }
+          if (this->instance_name == NULL ||
+              strcmp(this->instance_name, "gl_in") != 0 || this->array_specifier == NULL ||
+              !this->array_specifier->is_single_dimension()) {
+             _mesa_glsl_error(&loc, state,
+                              "gl_PerVertex input must be redeclared as "
+                              "gl_in[]");
+          }
+          break;
+       case ir_var_shader_out:
+          if (ir_variable *earlier_gl_Position =
+              state->symbols->get_variable("gl_Position")) {
+             earlier_per_vertex = earlier_gl_Position->get_interface_type();
+          } else if (ir_variable *earlier_gl_out =
+                state->symbols->get_variable("gl_out")) {
+             earlier_per_vertex = earlier_gl_out->get_interface_type();
+          } else {
+             _mesa_glsl_error(&loc, state,
+                              "redeclaration of gl_PerVertex output not "
+                              "allowed in the %s shader",
+                              _mesa_shader_stage_to_string(state->stage));
+          }
+          if (state->stage == MESA_SHADER_TESS_CTRL) {
+             if (this->instance_name == NULL ||
+                 strcmp(this->instance_name, "gl_out") != 0 || this->array_specifier == NULL) {
+                _mesa_glsl_error(&loc, state,
+                                 "gl_PerVertex output must be redeclared as "
+                                 "gl_out[]");
+             }
+          } else {
+             if (this->instance_name != NULL) {
+                _mesa_glsl_error(&loc, state,
+                                 "gl_PerVertex output may not be redeclared with "
+                                 "an instance name");
+             }
+          }
+          break;
+       default:
+          _mesa_glsl_error(&loc, state,
+                           "gl_PerVertex must be declared as an input or an "
+                           "output");
+          break;
+       }
+ 
+       if (earlier_per_vertex == NULL) {
+          /* An error has already been reported.  Bail out to avoid null
+           * dereferences later in this function.
+           */
+          return NULL;
+       }
+ 
+       /* Copy locations from the old gl_PerVertex interface block. */
+       for (unsigned i = 0; i < num_variables; i++) {
+          int j = earlier_per_vertex->field_index(fields[i].name);
+          if (j == -1) {
+             _mesa_glsl_error(&loc, state,
+                              "redeclaration of gl_PerVertex must be a subset "
+                              "of the built-in members of gl_PerVertex");
+          } else {
+             fields[i].location =
+                earlier_per_vertex->fields.structure[j].location;
+             fields[i].interpolation =
+                earlier_per_vertex->fields.structure[j].interpolation;
+             fields[i].centroid =
+                earlier_per_vertex->fields.structure[j].centroid;
+             fields[i].sample =
+                earlier_per_vertex->fields.structure[j].sample;
+             fields[i].patch =
+                earlier_per_vertex->fields.structure[j].patch;
+             fields[i].precision =
+                earlier_per_vertex->fields.structure[j].precision;
+          }
+       }
+ 
+       /* From section 7.1 ("Built-in Language Variables") of the GLSL 4.10
+        * spec:
+        *
+        *     If a built-in interface block is redeclared, it must appear in
+        *     the shader before any use of any member included in the built-in
+        *     declaration, or a compilation error will result.
+        *
+        * This appears to be a clarification to the behaviour established for
+        * gl_PerVertex by GLSL 1.50, therefore we implement this behaviour
+        * regardless of GLSL version.
+        */
+       interface_block_usage_visitor v(var_mode, earlier_per_vertex);
+       v.run(instructions);
+       if (v.usage_found()) {
+          _mesa_glsl_error(&loc, state,
+                           "redeclaration of a built-in interface block must "
+                           "appear before any use of any member of the "
+                           "interface block");
+       }
+    }
+ 
+    const glsl_type *block_type =
+       glsl_type::get_interface_instance(fields,
+                                         num_variables,
+                                         packing,
+                                         this->block_name);
+ 
+    if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) {
+       YYLTYPE loc = this->get_location();
+       _mesa_glsl_error(&loc, state, "interface block `%s' with type `%s' "
+                        "already taken in the current scope",
+                        this->block_name, iface_type_name);
+    }
+ 
+    /* Since interface blocks cannot contain statements, it should be
+     * impossible for the block to generate any instructions.
+     */
+    assert(declared_variables.is_empty());
+ 
+    /* From section 4.3.4 (Inputs) of the GLSL 1.50 spec:
+     *
+     *     Geometry shader input variables get the per-vertex values written
+     *     out by vertex shader output variables of the same names. Since a
+     *     geometry shader operates on a set of vertices, each input varying
+     *     variable (or input block, see interface blocks below) needs to be
+     *     declared as an array.
+     */
+    if (state->stage == MESA_SHADER_GEOMETRY && this->array_specifier == NULL &&
+        var_mode == ir_var_shader_in) {
+       _mesa_glsl_error(&loc, state, "geometry shader inputs must be arrays");
+    } else if ((state->stage == MESA_SHADER_TESS_CTRL ||
+                state->stage == MESA_SHADER_TESS_EVAL) &&
+               this->array_specifier == NULL &&
+               var_mode == ir_var_shader_in) {
+       _mesa_glsl_error(&loc, state, "per-vertex tessellation shader inputs must be arrays");
+    } else if (state->stage == MESA_SHADER_TESS_CTRL &&
+               this->array_specifier == NULL &&
+               var_mode == ir_var_shader_out) {
+       _mesa_glsl_error(&loc, state, "tessellation control shader outputs must be arrays");
+    }
+ 
+ 
+    /* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec
+     * says:
+     *
+     *     "If an instance name (instance-name) is used, then it puts all the
+     *     members inside a scope within its own name space, accessed with the
+     *     field selector ( . ) operator (analogously to structures)."
+     */
+    if (this->instance_name) {
+       if (redeclaring_per_vertex) {
+          /* When a built-in in an unnamed interface block is redeclared,
+           * get_variable_being_redeclared() calls
+           * check_builtin_array_max_size() to make sure that built-in array
+           * variables aren't redeclared to illegal sizes.  But we're looking
+           * at a redeclaration of a named built-in interface block.  So we
+           * have to manually call check_builtin_array_max_size() for all parts
+           * of the interface that are arrays.
+           */
+          for (unsigned i = 0; i < num_variables; i++) {
+             if (fields[i].type->is_array()) {
+                const unsigned size = fields[i].type->array_size();
+                check_builtin_array_max_size(fields[i].name, size, loc, state);
+             }
+          }
+       } else {
+          validate_identifier(this->instance_name, loc, state);
+       }
+ 
+       ir_variable *var;
+ 
+       if (this->array_specifier != NULL) {
+          const glsl_type *block_array_type =
+             process_array_type(&loc, block_type, this->array_specifier, state);
+ 
+          /* Section 4.3.7 (Interface Blocks) of the GLSL 1.50 spec says:
+           *
+           *     For uniform blocks declared an array, each individual array
+           *     element corresponds to a separate buffer object backing one
+           *     instance of the block. As the array size indicates the number
+           *     of buffer objects needed, uniform block array declarations
+           *     must specify an array size.
+           *
+           * And a few paragraphs later:
+           *
+           *     Geometry shader input blocks must be declared as arrays and
+           *     follow the array declaration and linking rules for all
+           *     geometry shader inputs. All other input and output block
+           *     arrays must specify an array size.
+           *
+           * The same applies to tessellation shaders.
+           *
+           * The upshot of this is that the only circumstance where an
+           * interface array size *doesn't* need to be specified is on a
+           * geometry shader input, tessellation control shader input,
+           * tessellation control shader output, and tessellation evaluation
+           * shader input.
+           */
+          if (block_array_type->is_unsized_array()) {
+             bool allow_inputs = state->stage == MESA_SHADER_GEOMETRY ||
+                                 state->stage == MESA_SHADER_TESS_CTRL ||
+                                 state->stage == MESA_SHADER_TESS_EVAL;
+             bool allow_outputs = state->stage == MESA_SHADER_TESS_CTRL;
+ 
+             if (this->layout.flags.q.in) {
+                if (!allow_inputs)
+                   _mesa_glsl_error(&loc, state,
+                                    "unsized input block arrays not allowed in "
+                                    "%s shader",
+                                    _mesa_shader_stage_to_string(state->stage));
+             } else if (this->layout.flags.q.out) {
+                if (!allow_outputs)
+                   _mesa_glsl_error(&loc, state,
+                                    "unsized output block arrays not allowed in "
+                                    "%s shader",
+                                    _mesa_shader_stage_to_string(state->stage));
+             } else {
+                /* by elimination, this is a uniform block array */
+                _mesa_glsl_error(&loc, state,
+                                 "unsized uniform block arrays not allowed in "
+                                 "%s shader",
+                                 _mesa_shader_stage_to_string(state->stage));
+             }
+          }
+ 
+          /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec:
+           *
+           *     * Arrays of arrays of blocks are not allowed
+           */
+          if (state->es_shader && block_array_type->is_array() &&
+              block_array_type->fields.array->is_array()) {
+             _mesa_glsl_error(&loc, state,
+                              "arrays of arrays interface blocks are "
+                              "not allowed");
+          }
+ 
+          var = new(state) ir_variable(block_array_type,
+                                       this->instance_name,
+                                       var_mode);
+       } else {
+          var = new(state) ir_variable(block_type,
+                                       this->instance_name,
+                                       var_mode);
+       }
+ 
+       var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED
+          ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout;
+ 
+       if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
+          var->data.read_only = true;
+ 
+       if (state->stage == MESA_SHADER_GEOMETRY && var_mode == ir_var_shader_in)
+          handle_geometry_shader_input_decl(state, loc, var);
+       else if ((state->stage == MESA_SHADER_TESS_CTRL ||
+            state->stage == MESA_SHADER_TESS_EVAL) && var_mode == ir_var_shader_in)
+          handle_tess_shader_input_decl(state, loc, var);
+       else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out)
+          handle_tess_ctrl_shader_output_decl(state, loc, var);
+ 
+       for (unsigned i = 0; i < num_variables; i++) {
+          if (fields[i].type->is_unsized_array()) {
+             if (var_mode == ir_var_shader_storage) {
+                if (i != (num_variables - 1)) {
+                   _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                    "only last member of a shader storage block "
+                                    "can be defined as unsized array",
+                                    fields[i].name);
+                }
+             } else {
+                /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+                *
+                * "If an array is declared as the last member of a shader storage
+                * block and the size is not specified at compile-time, it is
+                * sized at run-time. In all other cases, arrays are sized only
+                * at compile-time."
+                */
+                if (state->es_shader) {
+                   _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                  "only last member of a shader storage block "
+                                  "can be defined as unsized array",
+                                  fields[i].name);
+                }
+             }
+          }
+       }
+ 
+       if (ir_variable *earlier =
+           state->symbols->get_variable(this->instance_name)) {
+          if (!redeclaring_per_vertex) {
+             _mesa_glsl_error(&loc, state, "`%s' redeclared",
+                              this->instance_name);
+          }
+          earlier->data.how_declared = ir_var_declared_normally;
+          earlier->type = var->type;
+          earlier->reinit_interface_type(block_type);
+          delete var;
+       } else {
+          if (this->layout.flags.q.explicit_binding) {
+             apply_explicit_binding(state, &loc, var, var->type,
+                                    &this->layout);
+          }
+ 
+          var->data.stream = qual_stream;
+          if (layout.flags.q.explicit_location) {
+             var->data.location = expl_location;
+             var->data.explicit_location = true;
+          }
+ 
+          state->symbols->add_variable(var);
+          instructions->push_tail(var);
+       }
+    } else {
+       /* In order to have an array size, the block must also be declared with
+        * an instance name.
+        */
+       assert(this->array_specifier == NULL);
+ 
+       for (unsigned i = 0; i < num_variables; i++) {
+          ir_variable *var =
+             new(state) ir_variable(fields[i].type,
+                                    ralloc_strdup(state, fields[i].name),
+                                    var_mode);
+          var->data.interpolation = fields[i].interpolation;
+          var->data.centroid = fields[i].centroid;
+          var->data.sample = fields[i].sample;
+          var->data.patch = fields[i].patch;
+          var->data.stream = qual_stream;
+          var->data.location = fields[i].location;
+          if (fields[i].location != -1)
+             var->data.explicit_location = true;
+          var->init_interface_type(block_type);
+ 
+          if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
+             var->data.read_only = true;
+ 
+          /* Precision qualifiers do not have any meaning in Desktop GLSL */
+          if (state->es_shader) {
+             var->data.precision =
+                select_gles_precision(fields[i].precision, fields[i].type,
+                                      state, &loc);
+          }
+ 
+          if (fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED) {
+             var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED
+                ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout;
+          } else {
+             var->data.matrix_layout = fields[i].matrix_layout;
+          }
+ 
+          if (var->data.mode == ir_var_shader_storage) {
+             var->data.image_read_only = fields[i].image_read_only;
+             var->data.image_write_only = fields[i].image_write_only;
+             var->data.image_coherent = fields[i].image_coherent;
+             var->data.image_volatile = fields[i].image_volatile;
+             var->data.image_restrict = fields[i].image_restrict;
+          }
+ 
+          /* Examine var name here since var may get deleted in the next call */
+          bool var_is_gl_id = is_gl_identifier(var->name);
+ 
+          if (redeclaring_per_vertex) {
+             ir_variable *earlier =
+                get_variable_being_redeclared(var, loc, state,
+                                              true /* allow_all_redeclarations */);
+             if (!var_is_gl_id || earlier == NULL) {
+                _mesa_glsl_error(&loc, state,
+                                 "redeclaration of gl_PerVertex can only "
+                                 "include built-in variables");
+             } else if (earlier->data.how_declared == ir_var_declared_normally) {
+                _mesa_glsl_error(&loc, state,
+                                 "`%s' has already been redeclared",
+                                 earlier->name);
+             } else {
+                earlier->data.how_declared = ir_var_declared_in_block;
+                earlier->reinit_interface_type(block_type);
+             }
+             continue;
+          }
+ 
+          if (state->symbols->get_variable(var->name) != NULL)
+             _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name);
+ 
+          /* Propagate the "binding" keyword into this UBO/SSBO's fields.
+           * The UBO declaration itself doesn't get an ir_variable unless it
+           * has an instance name.  This is ugly.
+           */
+          if (this->layout.flags.q.explicit_binding) {
+             apply_explicit_binding(state, &loc, var,
+                                    var->get_interface_type(), &this->layout);
+          }
+ 
+          if (var->type->is_unsized_array()) {
+             if (var->is_in_shader_storage_block()) {
+                if (!is_unsized_array_last_element(var)) {
+                   _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                    "only last member of a shader storage block "
+                                    "can be defined as unsized array",
+                                    var->name);
+                }
+                var->data.from_ssbo_unsized_array = true;
+             } else {
+                /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+                *
+                * "If an array is declared as the last member of a shader storage
+                * block and the size is not specified at compile-time, it is
+                * sized at run-time. In all other cases, arrays are sized only
+                * at compile-time."
+                */
+                if (state->es_shader) {
+                   _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                  "only last member of a shader storage block "
+                                  "can be defined as unsized array",
+                                  var->name);
+                }
+             }
+          }
+ 
+          state->symbols->add_variable(var);
+          instructions->push_tail(var);
+       }
+ 
+       if (redeclaring_per_vertex && block_type != earlier_per_vertex) {
+          /* From section 7.1 ("Built-in Language Variables") of the GLSL 4.10 spec:
+           *
+           *     It is also a compilation error ... to redeclare a built-in
+           *     block and then use a member from that built-in block that was
+           *     not included in the redeclaration.
+           *
+           * This appears to be a clarification to the behaviour established
+           * for gl_PerVertex by GLSL 1.50, therefore we implement this
+           * behaviour regardless of GLSL version.
+           *
+           * To prevent the shader from using a member that was not included in
+           * the redeclaration, we disable any ir_variables that are still
+           * associated with the old declaration of gl_PerVertex (since we've
+           * already updated all of the variables contained in the new
+           * gl_PerVertex to point to it).
+           *
+           * As a side effect this will prevent
+           * validate_intrastage_interface_blocks() from getting confused and
+           * thinking there are conflicting definitions of gl_PerVertex in the
+           * shader.
+           */
+          foreach_in_list_safe(ir_instruction, node, instructions) {
+             ir_variable *const var = node->as_variable();
+             if (var != NULL &&
+                 var->get_interface_type() == earlier_per_vertex &&
+                 var->data.mode == var_mode) {
+                if (var->data.how_declared == ir_var_declared_normally) {
+                   _mesa_glsl_error(&loc, state,
+                                    "redeclaration of gl_PerVertex cannot "
+                                    "follow a redeclaration of `%s'",
+                                    var->name);
+                }
+                state->symbols->disable_variable(var->name);
+                var->remove();
+             }
+          }
+       }
+    }
+ 
+    return NULL;
+ }
+ 
+ 
+ ir_rvalue *
+ ast_tcs_output_layout::hir(exec_list *instructions,
+                         struct _mesa_glsl_parse_state *state)
+ {
+    YYLTYPE loc = this->get_location();
+ 
+    unsigned num_vertices;
+    if (!state->out_qualifier->vertices->
+           process_qualifier_constant(state, "vertices", &num_vertices,
+                                      false)) {
+       /* return here to stop cascading incorrect error messages */
+      return NULL;
+    }
+ 
+    /* If any shader outputs occurred before this declaration and specified an
+     * array size, make sure the size they specified is consistent with the
+     * primitive type.
+     */
+    if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) {
+       _mesa_glsl_error(&loc, state,
+                      "this tessellation control shader output layout "
+                      "specifies %u vertices, but a previous output "
+                      "is declared with size %u",
+                      num_vertices, state->tcs_output_size);
+       return NULL;
+    }
+ 
+    state->tcs_output_vertices_specified = true;
+ 
+    /* If any shader outputs occurred before this declaration and did not
+     * specify an array size, their size is determined now.
+     */
+    foreach_in_list (ir_instruction, node, instructions) {
+       ir_variable *var = node->as_variable();
+       if (var == NULL || var->data.mode != ir_var_shader_out)
+        continue;
+ 
+       /* Note: Not all tessellation control shader output are arrays. */
+       if (!var->type->is_unsized_array() || var->data.patch)
+          continue;
+ 
+       if (var->data.max_array_access >= num_vertices) {
+        _mesa_glsl_error(&loc, state,
+                         "this tessellation control shader output layout "
+                         "specifies %u vertices, but an access to element "
+                         "%u of output `%s' already exists", num_vertices,
+                         var->data.max_array_access, var->name);
+       } else {
+        var->type = glsl_type::get_array_instance(var->type->fields.array,
+                                                  num_vertices);
+       }
+    }
+ 
+    return NULL;
+ }
+ 
+ 
+ ir_rvalue *
+ ast_gs_input_layout::hir(exec_list *instructions,
+                          struct _mesa_glsl_parse_state *state)
+ {
+    YYLTYPE loc = this->get_location();
+ 
+    /* If any geometry input layout declaration preceded this one, make sure it
+     * was consistent with this one.
+     */
+    if (state->gs_input_prim_type_specified &&
+        state->in_qualifier->prim_type != this->prim_type) {
+       _mesa_glsl_error(&loc, state,
+                        "geometry shader input layout does not match"
+                        " previous declaration");
+       return NULL;
+    }
+ 
+    /* If any shader inputs occurred before this declaration and specified an
+     * array size, make sure the size they specified is consistent with the
+     * primitive type.
+     */
+    unsigned num_vertices = vertices_per_prim(this->prim_type);
+    if (state->gs_input_size != 0 && state->gs_input_size != num_vertices) {
+       _mesa_glsl_error(&loc, state,
+                        "this geometry shader input layout implies %u vertices"
+                        " per primitive, but a previous input is declared"
+                        " with size %u", num_vertices, state->gs_input_size);
+       return NULL;
+    }
+ 
+    state->gs_input_prim_type_specified = true;
+ 
+    /* If any shader inputs occurred before this declaration and did not
+     * specify an array size, their size is determined now.
+     */
+    foreach_in_list(ir_instruction, node, instructions) {
+       ir_variable *var = node->as_variable();
+       if (var == NULL || var->data.mode != ir_var_shader_in)
+          continue;
+ 
+       /* Note: gl_PrimitiveIDIn has mode ir_var_shader_in, but it's not an
+        * array; skip it.
+        */
+ 
+       if (var->type->is_unsized_array()) {
+          if (var->data.max_array_access >= num_vertices) {
+             _mesa_glsl_error(&loc, state,
+                              "this geometry shader input layout implies %u"
+                              " vertices, but an access to element %u of input"
+                              " `%s' already exists", num_vertices,
+                              var->data.max_array_access, var->name);
+          } else {
+             var->type = glsl_type::get_array_instance(var->type->fields.array,
+                                                       num_vertices);
+          }
+       }
+    }
+ 
+    return NULL;
+ }
+ 
+ 
+ ir_rvalue *
+ ast_cs_input_layout::hir(exec_list *instructions,
+                          struct _mesa_glsl_parse_state *state)
+ {
+    YYLTYPE loc = this->get_location();
+ 
+    /* From the ARB_compute_shader specification:
+     *
+     *     If the local size of the shader in any dimension is greater
+     *     than the maximum size supported by the implementation for that
+     *     dimension, a compile-time error results.
+     *
+     * It is not clear from the spec how the error should be reported if
+     * the total size of the work group exceeds
+     * MAX_COMPUTE_WORK_GROUP_INVOCATIONS, but it seems reasonable to
+     * report it at compile time as well.
+     */
+    GLuint64 total_invocations = 1;
+    unsigned qual_local_size[3];
+    for (int i = 0; i < 3; i++) {
+ 
+       char *local_size_str = ralloc_asprintf(NULL, "invalid local_size_%c",
+                                              'x' + i);
+       /* Infer a local_size of 1 for unspecified dimensions */
+       if (this->local_size[i] == NULL) {
+          qual_local_size[i] = 1;
+       } else if (!this->local_size[i]->
+              process_qualifier_constant(state, local_size_str,
+                                         &qual_local_size[i], false)) {
+          ralloc_free(local_size_str);
+          return NULL;
+       }
+       ralloc_free(local_size_str);
+ 
+       if (qual_local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) {
+          _mesa_glsl_error(&loc, state,
+                           "local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE"
+                           " (%d)", 'x' + i,
+                           state->ctx->Const.MaxComputeWorkGroupSize[i]);
+          break;
+       }
+       total_invocations *= qual_local_size[i];
+       if (total_invocations >
+           state->ctx->Const.MaxComputeWorkGroupInvocations) {
+          _mesa_glsl_error(&loc, state,
+                           "product of local_sizes exceeds "
+                           "MAX_COMPUTE_WORK_GROUP_INVOCATIONS (%d)",
+                           state->ctx->Const.MaxComputeWorkGroupInvocations);
+          break;
+       }
+    }
+ 
+    /* If any compute input layout declaration preceded this one, make sure it
+     * was consistent with this one.
+     */
+    if (state->cs_input_local_size_specified) {
+       for (int i = 0; i < 3; i++) {
+          if (state->cs_input_local_size[i] != qual_local_size[i]) {
+             _mesa_glsl_error(&loc, state,
+                              "compute shader input layout does not match"
+                              " previous declaration");
+             return NULL;
+          }
+       }
+    }
+ 
+    state->cs_input_local_size_specified = true;
+    for (int i = 0; i < 3; i++)
+       state->cs_input_local_size[i] = qual_local_size[i];
+ 
+    /* We may now declare the built-in constant gl_WorkGroupSize (see
+     * builtin_variable_generator::generate_constants() for why we didn't
+     * declare it earlier).
+     */
+    ir_variable *var = new(state->symbols)
+       ir_variable(glsl_type::uvec3_type, "gl_WorkGroupSize", ir_var_auto);
+    var->data.how_declared = ir_var_declared_implicitly;
+    var->data.read_only = true;
+    instructions->push_tail(var);
+    state->symbols->add_variable(var);
+    ir_constant_data data;
+    memset(&data, 0, sizeof(data));
+    for (int i = 0; i < 3; i++)
+       data.u[i] = qual_local_size[i];
+    var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data);
+    var->constant_initializer =
+       new(var) ir_constant(glsl_type::uvec3_type, &data);
+    var->data.has_initializer = true;
+ 
+    return NULL;
+ }
+ 
+ 
+ static void
+ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
+                                exec_list *instructions)
+ {
+    bool gl_FragColor_assigned = false;
+    bool gl_FragData_assigned = false;
+    bool gl_FragSecondaryColor_assigned = false;
+    bool gl_FragSecondaryData_assigned = false;
+    bool user_defined_fs_output_assigned = false;
+    ir_variable *user_defined_fs_output = NULL;
+ 
+    /* It would be nice to have proper location information. */
+    YYLTYPE loc;
+    memset(&loc, 0, sizeof(loc));
+ 
+    foreach_in_list(ir_instruction, node, instructions) {
+       ir_variable *var = node->as_variable();
+ 
+       if (!var || !var->data.assigned)
+          continue;
+ 
+       if (strcmp(var->name, "gl_FragColor") == 0)
+          gl_FragColor_assigned = true;
+       else if (strcmp(var->name, "gl_FragData") == 0)
+          gl_FragData_assigned = true;
+       else if (strcmp(var->name, "gl_SecondaryFragColorEXT") == 0)
+          gl_FragSecondaryColor_assigned = true;
+       else if (strcmp(var->name, "gl_SecondaryFragDataEXT") == 0)
+          gl_FragSecondaryData_assigned = true;
+       else if (!is_gl_identifier(var->name)) {
+          if (state->stage == MESA_SHADER_FRAGMENT &&
+              var->data.mode == ir_var_shader_out) {
+             user_defined_fs_output_assigned = true;
+             user_defined_fs_output = var;
+          }
+       }
+    }
+ 
+    /* From the GLSL 1.30 spec:
+     *
+     *     "If a shader statically assigns a value to gl_FragColor, it
+     *      may not assign a value to any element of gl_FragData. If a
+     *      shader statically writes a value to any element of
+     *      gl_FragData, it may not assign a value to
+     *      gl_FragColor. That is, a shader may assign values to either
+     *      gl_FragColor or gl_FragData, but not both. Multiple shaders
+     *      linked together must also consistently write just one of
+     *      these variables.  Similarly, if user declared output
+     *      variables are in use (statically assigned to), then the
+     *      built-in variables gl_FragColor and gl_FragData may not be
+     *      assigned to. These incorrect usages all generate compile
+     *      time errors."
+     */
+    if (gl_FragColor_assigned && gl_FragData_assigned) {
+       _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+                        "`gl_FragColor' and `gl_FragData'");
+    } else if (gl_FragColor_assigned && user_defined_fs_output_assigned) {
+       _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+                        "`gl_FragColor' and `%s'",
+                        user_defined_fs_output->name);
+    } else if (gl_FragSecondaryColor_assigned && gl_FragSecondaryData_assigned) {
+       _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+                        "`gl_FragSecondaryColorEXT' and"
+                        " `gl_FragSecondaryDataEXT'");
+    } else if (gl_FragColor_assigned && gl_FragSecondaryData_assigned) {
+       _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+                        "`gl_FragColor' and"
+                        " `gl_FragSecondaryDataEXT'");
+    } else if (gl_FragData_assigned && gl_FragSecondaryColor_assigned) {
+       _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+                        "`gl_FragData' and"
+                        " `gl_FragSecondaryColorEXT'");
+    } else if (gl_FragData_assigned && user_defined_fs_output_assigned) {
+       _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+                        "`gl_FragData' and `%s'",
+                        user_defined_fs_output->name);
+    }
+ 
+    if ((gl_FragSecondaryColor_assigned || gl_FragSecondaryData_assigned) &&
+        !state->EXT_blend_func_extended_enable) {
+       _mesa_glsl_error(&loc, state,
+                        "Dual source blending requires EXT_blend_func_extended");
+    }
+ }
+ 
+ 
+ static void
+ remove_per_vertex_blocks(exec_list *instructions,
+                          _mesa_glsl_parse_state *state, ir_variable_mode mode)
+ {
+    /* Find the gl_PerVertex interface block of the appropriate (in/out) mode,
+     * if it exists in this shader type.
+     */
+    const glsl_type *per_vertex = NULL;
+    switch (mode) {
+    case ir_var_shader_in:
+       if (ir_variable *gl_in = state->symbols->get_variable("gl_in"))
+          per_vertex = gl_in->get_interface_type();
+       break;
+    case ir_var_shader_out:
+       if (ir_variable *gl_Position =
+           state->symbols->get_variable("gl_Position")) {
+          per_vertex = gl_Position->get_interface_type();
+       }
+       break;
+    default:
+       assert(!"Unexpected mode");
+       break;
+    }
+ 
+    /* If we didn't find a built-in gl_PerVertex interface block, then we don't
+     * need to do anything.
+     */
+    if (per_vertex == NULL)
+       return;
+ 
+    /* If the interface block is used by the shader, then we don't need to do
+     * anything.
+     */
+    interface_block_usage_visitor v(mode, per_vertex);
+    v.run(instructions);
+    if (v.usage_found())
+       return;
+ 
+    /* Remove any ir_variable declarations that refer to the interface block
+     * we're removing.
+     */
+    foreach_in_list_safe(ir_instruction, node, instructions) {
+       ir_variable *const var = node->as_variable();
+       if (var != NULL && var->get_interface_type() == per_vertex &&
+           var->data.mode == mode) {
+          state->symbols->disable_variable(var->name);
+          var->remove();
+       }
+    }
+ }
diff --cc src/compiler/glsl/glsl_parser_extras.cpp

index 0000000000000000000000000000000000000000,603895497d12e645a380fc5822f2d791b2b999a0..ecf0d7f76e53733f40460d31bc79b030e4fee04e

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@@ -1,0 -1,1952 +1,1954 @@@
+ /*
+  * Copyright © 2008, 2009 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ #include <stdio.h>
+ #include <stdarg.h>
+ #include <string.h>
+ #include <assert.h>
+ 
+ #include "main/core.h" /* for struct gl_context */
+ #include "main/context.h"
+ #include "main/shaderobj.h"
+ #include "util/u_atomic.h" /* for p_atomic_cmpxchg */
+ #include "util/ralloc.h"
+ #include "ast.h"
+ #include "glsl_parser_extras.h"
+ #include "glsl_parser.h"
+ #include "ir_optimization.h"
+ #include "loop_analysis.h"
+ 
+ /**
+  * Format a short human-readable description of the given GLSL version.
+  */
+ const char *
+ glsl_compute_version_string(void *mem_ctx, bool is_es, unsigned version)
+ {
+    return ralloc_asprintf(mem_ctx, "GLSL%s %d.%02d", is_es ? " ES" : "",
+                           version / 100, version % 100);
+ }
+ 
+ 
+ static const unsigned known_desktop_glsl_versions[] =
+    { 110, 120, 130, 140, 150, 330, 400, 410, 420, 430, 440, 450 };
+ 
+ 
+ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
+                                              gl_shader_stage stage,
+                                                void *mem_ctx)
+    : ctx(_ctx), cs_input_local_size_specified(false), cs_input_local_size(),
+      switch_state()
+ {
+    assert(stage < MESA_SHADER_STAGES);
+    this->stage = stage;
+ 
+    this->scanner = NULL;
+    this->translation_unit.make_empty();
+    this->symbols = new(mem_ctx) glsl_symbol_table;
+ 
+    this->info_log = ralloc_strdup(mem_ctx, "");
+    this->error = false;
+    this->loop_nesting_ast = NULL;
+ 
+    this->struct_specifier_depth = 0;
+ 
+    this->uses_builtin_functions = false;
+ 
+    /* Set default language version and extensions */
+    this->language_version = 110;
+    this->forced_language_version = ctx->Const.ForceGLSLVersion;
+    this->es_shader = false;
+    this->ARB_texture_rectangle_enable = true;
+ 
+    /* OpenGL ES 2.0 has different defaults from desktop GL. */
+    if (ctx->API == API_OPENGLES2) {
+       this->language_version = 100;
+       this->es_shader = true;
+       this->ARB_texture_rectangle_enable = false;
+    }
+ 
+    this->extensions = &ctx->Extensions;
+ 
++   this->ARB_compute_shader_enable = true;
++
+    this->Const.MaxLights = ctx->Const.MaxLights;
+    this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes;
+    this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits;
+    this->Const.MaxTextureCoords = ctx->Const.MaxTextureCoordUnits;
+    this->Const.MaxVertexAttribs = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs;
+    this->Const.MaxVertexUniformComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents;
+    this->Const.MaxVertexTextureImageUnits = ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits;
+    this->Const.MaxCombinedTextureImageUnits = ctx->Const.MaxCombinedTextureImageUnits;
+    this->Const.MaxTextureImageUnits = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
+    this->Const.MaxFragmentUniformComponents = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents;
+    this->Const.MinProgramTexelOffset = ctx->Const.MinProgramTexelOffset;
+    this->Const.MaxProgramTexelOffset = ctx->Const.MaxProgramTexelOffset;
+ 
+    this->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers;
+ 
+    this->Const.MaxDualSourceDrawBuffers = ctx->Const.MaxDualSourceDrawBuffers;
+ 
+    /* 1.50 constants */
+    this->Const.MaxVertexOutputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
+    this->Const.MaxGeometryInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents;
+    this->Const.MaxGeometryOutputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents;
+    this->Const.MaxFragmentInputComponents = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents;
+    this->Const.MaxGeometryTextureImageUnits = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits;
+    this->Const.MaxGeometryOutputVertices = ctx->Const.MaxGeometryOutputVertices;
+    this->Const.MaxGeometryTotalOutputComponents = ctx->Const.MaxGeometryTotalOutputComponents;
+    this->Const.MaxGeometryUniformComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents;
+ 
+    this->Const.MaxVertexAtomicCounters = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters;
+    this->Const.MaxTessControlAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicCounters;
+    this->Const.MaxTessEvaluationAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters;
+    this->Const.MaxGeometryAtomicCounters = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters;
+    this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters;
+    this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters;
+    this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings;
+    this->Const.MaxVertexAtomicCounterBuffers =
+       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers;
+    this->Const.MaxTessControlAtomicCounterBuffers =
+       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers;
+    this->Const.MaxTessEvaluationAtomicCounterBuffers =
+       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers;
+    this->Const.MaxGeometryAtomicCounterBuffers =
+       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers;
+    this->Const.MaxFragmentAtomicCounterBuffers =
+       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
+    this->Const.MaxCombinedAtomicCounterBuffers =
+       ctx->Const.MaxCombinedAtomicBuffers;
+    this->Const.MaxAtomicCounterBufferSize =
+       ctx->Const.MaxAtomicBufferSize;
+ 
+    /* Compute shader constants */
+    for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupCount); i++)
+       this->Const.MaxComputeWorkGroupCount[i] = ctx->Const.MaxComputeWorkGroupCount[i];
+    for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupSize); i++)
+       this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i];
+ 
+    this->Const.MaxImageUnits = ctx->Const.MaxImageUnits;
+    this->Const.MaxCombinedShaderOutputResources = ctx->Const.MaxCombinedShaderOutputResources;
+    this->Const.MaxImageSamples = ctx->Const.MaxImageSamples;
+    this->Const.MaxVertexImageUniforms = ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms;
+    this->Const.MaxTessControlImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms;
+    this->Const.MaxTessEvaluationImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms;
+    this->Const.MaxGeometryImageUniforms = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms;
+    this->Const.MaxFragmentImageUniforms = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms;
+    this->Const.MaxCombinedImageUniforms = ctx->Const.MaxCombinedImageUniforms;
+ 
+    /* ARB_viewport_array */
+    this->Const.MaxViewports = ctx->Const.MaxViewports;
+ 
+    /* tessellation shader constants */
+    this->Const.MaxPatchVertices = ctx->Const.MaxPatchVertices;
+    this->Const.MaxTessGenLevel = ctx->Const.MaxTessGenLevel;
+    this->Const.MaxTessControlInputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents;
+    this->Const.MaxTessControlOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents;
+    this->Const.MaxTessControlTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits;
+    this->Const.MaxTessEvaluationInputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents;
+    this->Const.MaxTessEvaluationOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents;
+    this->Const.MaxTessEvaluationTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits;
+    this->Const.MaxTessPatchComponents = ctx->Const.MaxTessPatchComponents;
+    this->Const.MaxTessControlTotalOutputComponents = ctx->Const.MaxTessControlTotalOutputComponents;
+    this->Const.MaxTessControlUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformComponents;
+    this->Const.MaxTessEvaluationUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformComponents;
+ 
+    this->current_function = NULL;
+    this->toplevel_ir = NULL;
+    this->found_return = false;
+    this->all_invariant = false;
+    this->user_structures = NULL;
+    this->num_user_structures = 0;
+    this->num_subroutines = 0;
+    this->subroutines = NULL;
+    this->num_subroutine_types = 0;
+    this->subroutine_types = NULL;
+ 
+    /* supported_versions should be large enough to support the known desktop
+     * GLSL versions plus 3 GLES versions (ES 1.00, ES 3.00, and ES 3.10))
+     */
+    STATIC_ASSERT((ARRAY_SIZE(known_desktop_glsl_versions) + 3) ==
+                  ARRAY_SIZE(this->supported_versions));
+ 
+    /* Populate the list of supported GLSL versions */
+    /* FINISHME: Once the OpenGL 3.0 'forward compatible' context or
+     * the OpenGL 3.2 Core context is supported, this logic will need
+     * change.  Older versions of GLSL are no longer supported
+     * outside the compatibility contexts of 3.x.
+     */
+    this->num_supported_versions = 0;
+    if (_mesa_is_desktop_gl(ctx)) {
+       for (unsigned i = 0; i < ARRAY_SIZE(known_desktop_glsl_versions); i++) {
+          if (known_desktop_glsl_versions[i] <= ctx->Const.GLSLVersion) {
+             this->supported_versions[this->num_supported_versions].ver
+                = known_desktop_glsl_versions[i];
+             this->supported_versions[this->num_supported_versions].es = false;
+             this->num_supported_versions++;
+          }
+       }
+    }
+    if (ctx->API == API_OPENGLES2 || ctx->Extensions.ARB_ES2_compatibility) {
+       this->supported_versions[this->num_supported_versions].ver = 100;
+       this->supported_versions[this->num_supported_versions].es = true;
+       this->num_supported_versions++;
+    }
+    if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
+       this->supported_versions[this->num_supported_versions].ver = 300;
+       this->supported_versions[this->num_supported_versions].es = true;
+       this->num_supported_versions++;
+    }
+    if (_mesa_is_gles31(ctx)) {
+       this->supported_versions[this->num_supported_versions].ver = 310;
+       this->supported_versions[this->num_supported_versions].es = true;
+       this->num_supported_versions++;
+    }
+ 
+    /* Create a string for use in error messages to tell the user which GLSL
+     * versions are supported.
+     */
+    char *supported = ralloc_strdup(this, "");
+    for (unsigned i = 0; i < this->num_supported_versions; i++) {
+       unsigned ver = this->supported_versions[i].ver;
+       const char *const prefix = (i == 0)
+        ? ""
+        : ((i == this->num_supported_versions - 1) ? ", and " : ", ");
+       const char *const suffix = (this->supported_versions[i].es) ? " ES" : "";
+ 
+       ralloc_asprintf_append(& supported, "%s%u.%02u%s",
+                            prefix,
+                            ver / 100, ver % 100,
+                            suffix);
+    }
+ 
+    this->supported_version_string = supported;
+ 
+    if (ctx->Const.ForceGLSLExtensionsWarn)
+       _mesa_glsl_process_extension("all", NULL, "warn", NULL, this);
+ 
+    this->default_uniform_qualifier = new(this) ast_type_qualifier();
+    this->default_uniform_qualifier->flags.q.shared = 1;
+    this->default_uniform_qualifier->flags.q.column_major = 1;
+    this->default_uniform_qualifier->is_default_qualifier = true;
+ 
+    this->default_shader_storage_qualifier = new(this) ast_type_qualifier();
+    this->default_shader_storage_qualifier->flags.q.shared = 1;
+    this->default_shader_storage_qualifier->flags.q.column_major = 1;
+    this->default_shader_storage_qualifier->is_default_qualifier = true;
+ 
+    this->fs_uses_gl_fragcoord = false;
+    this->fs_redeclares_gl_fragcoord = false;
+    this->fs_origin_upper_left = false;
+    this->fs_pixel_center_integer = false;
+    this->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = false;
+ 
+    this->gs_input_prim_type_specified = false;
+    this->tcs_output_vertices_specified = false;
+    this->gs_input_size = 0;
+    this->in_qualifier = new(this) ast_type_qualifier();
+    this->out_qualifier = new(this) ast_type_qualifier();
+    this->fs_early_fragment_tests = false;
+    memset(this->atomic_counter_offsets, 0,
+           sizeof(this->atomic_counter_offsets));
+    this->allow_extension_directive_midshader =
+       ctx->Const.AllowGLSLExtensionDirectiveMidShader;
+ }
+ 
+ /**
+  * Determine whether the current GLSL version is sufficiently high to support
+  * a certain feature, and generate an error message if it isn't.
+  *
+  * \param required_glsl_version and \c required_glsl_es_version are
+  * interpreted as they are in _mesa_glsl_parse_state::is_version().
+  *
+  * \param locp is the parser location where the error should be reported.
+  *
+  * \param fmt (and additional arguments) constitute a printf-style error
+  * message to report if the version check fails.  Information about the
+  * current and required GLSL versions will be appended.  So, for example, if
+  * the GLSL version being compiled is 1.20, and check_version(130, 300, locp,
+  * "foo unsupported") is called, the error message will be "foo unsupported in
+  * GLSL 1.20 (GLSL 1.30 or GLSL 3.00 ES required)".
+  */
+ bool
+ _mesa_glsl_parse_state::check_version(unsigned required_glsl_version,
+                                       unsigned required_glsl_es_version,
+                                       YYLTYPE *locp, const char *fmt, ...)
+ {
+    if (this->is_version(required_glsl_version, required_glsl_es_version))
+       return true;
+ 
+    va_list args;
+    va_start(args, fmt);
+    char *problem = ralloc_vasprintf(this, fmt, args);
+    va_end(args);
+    const char *glsl_version_string
+       = glsl_compute_version_string(this, false, required_glsl_version);
+    const char *glsl_es_version_string
+       = glsl_compute_version_string(this, true, required_glsl_es_version);
+    const char *requirement_string = "";
+    if (required_glsl_version && required_glsl_es_version) {
+       requirement_string = ralloc_asprintf(this, " (%s or %s required)",
+                                            glsl_version_string,
+                                            glsl_es_version_string);
+    } else if (required_glsl_version) {
+       requirement_string = ralloc_asprintf(this, " (%s required)",
+                                            glsl_version_string);
+    } else if (required_glsl_es_version) {
+       requirement_string = ralloc_asprintf(this, " (%s required)",
+                                            glsl_es_version_string);
+    }
+    _mesa_glsl_error(locp, this, "%s in %s%s",
+                     problem, this->get_version_string(),
+                     requirement_string);
+ 
+    return false;
+ }
+ 
+ /**
+  * Process a GLSL #version directive.
+  *
+  * \param version is the integer that follows the #version token.
+  *
+  * \param ident is a string identifier that follows the integer, if any is
+  * present.  Otherwise NULL.
+  */
+ void
+ _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version,
+                                                   const char *ident)
+ {
+    bool es_token_present = false;
+    if (ident) {
+       if (strcmp(ident, "es") == 0) {
+          es_token_present = true;
+       } else if (version >= 150) {
+          if (strcmp(ident, "core") == 0) {
+             /* Accept the token.  There's no need to record that this is
+              * a core profile shader since that's the only profile we support.
+              */
+          } else if (strcmp(ident, "compatibility") == 0) {
+             _mesa_glsl_error(locp, this,
+                              "the compatibility profile is not supported");
+          } else {
+             _mesa_glsl_error(locp, this,
+                              "\"%s\" is not a valid shading language profile; "
+                              "if present, it must be \"core\"", ident);
+          }
+       } else {
+          _mesa_glsl_error(locp, this,
+                           "illegal text following version number");
+       }
+    }
+ 
+    this->es_shader = es_token_present;
+    if (version == 100) {
+       if (es_token_present) {
+          _mesa_glsl_error(locp, this,
+                           "GLSL 1.00 ES should be selected using "
+                           "`#version 100'");
+       } else {
+          this->es_shader = true;
+       }
+    }
+ 
+    if (this->es_shader) {
+       this->ARB_texture_rectangle_enable = false;
+    }
+ 
+    if (this->forced_language_version)
+       this->language_version = this->forced_language_version;
+    else
+       this->language_version = version;
+ 
+    bool supported = false;
+    for (unsigned i = 0; i < this->num_supported_versions; i++) {
+       if (this->supported_versions[i].ver == this->language_version
+           && this->supported_versions[i].es == this->es_shader) {
+          supported = true;
+          break;
+       }
+    }
+ 
+    if (!supported) {
+       _mesa_glsl_error(locp, this, "%s is not supported. "
+                        "Supported versions are: %s",
+                        this->get_version_string(),
+                        this->supported_version_string);
+ 
+       /* On exit, the language_version must be set to a valid value.
+        * Later calls to _mesa_glsl_initialize_types will misbehave if
+        * the version is invalid.
+        */
+       switch (this->ctx->API) {
+       case API_OPENGL_COMPAT:
+       case API_OPENGL_CORE:
+        this->language_version = this->ctx->Const.GLSLVersion;
+        break;
+ 
+       case API_OPENGLES:
+        assert(!"Should not get here.");
+        /* FALLTHROUGH */
+ 
+       case API_OPENGLES2:
+        this->language_version = 100;
+        break;
+       }
+    }
+ }
+ 
+ 
+ /* This helper function will append the given message to the shader's
+    info log and report it via GL_ARB_debug_output. Per that extension,
+    'type' is one of the enum values classifying the message, and
+    'id' is the implementation-defined ID of the given message. */
+ static void
+ _mesa_glsl_msg(const YYLTYPE *locp, _mesa_glsl_parse_state *state,
+                GLenum type, const char *fmt, va_list ap)
+ {
+    bool error = (type == MESA_DEBUG_TYPE_ERROR);
+    GLuint msg_id = 0;
+ 
+    assert(state->info_log != NULL);
+ 
+    /* Get the offset that the new message will be written to. */
+    int msg_offset = strlen(state->info_log);
+ 
+    ralloc_asprintf_append(&state->info_log, "%u:%u(%u): %s: ",
+                                           locp->source,
+                                           locp->first_line,
+                                           locp->first_column,
+                                           error ? "error" : "warning");
+    ralloc_vasprintf_append(&state->info_log, fmt, ap);
+ 
+    const char *const msg = &state->info_log[msg_offset];
+    struct gl_context *ctx = state->ctx;
+ 
+    /* Report the error via GL_ARB_debug_output. */
+    _mesa_shader_debug(ctx, type, &msg_id, msg);
+ 
+    ralloc_strcat(&state->info_log, "\n");
+ }
+ 
+ void
+ _mesa_glsl_error(YYLTYPE *locp, _mesa_glsl_parse_state *state,
+                const char *fmt, ...)
+ {
+    va_list ap;
+ 
+    state->error = true;
+ 
+    va_start(ap, fmt);
+    _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_ERROR, fmt, ap);
+    va_end(ap);
+ }
+ 
+ 
+ void
+ _mesa_glsl_warning(const YYLTYPE *locp, _mesa_glsl_parse_state *state,
+                  const char *fmt, ...)
+ {
+    va_list ap;
+ 
+    va_start(ap, fmt);
+    _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_OTHER, fmt, ap);
+    va_end(ap);
+ }
+ 
+ 
+ /**
+  * Enum representing the possible behaviors that can be specified in
+  * an #extension directive.
+  */
+ enum ext_behavior {
+    extension_disable,
+    extension_enable,
+    extension_require,
+    extension_warn
+ };
+ 
+ /**
+  * Element type for _mesa_glsl_supported_extensions
+  */
+ struct _mesa_glsl_extension {
+    /**
+     * Name of the extension when referred to in a GLSL extension
+     * statement
+     */
+    const char *name;
+ 
+    /** True if this extension is available to desktop GL shaders */
+    bool avail_in_GL;
+ 
+    /** True if this extension is available to GLES shaders */
+    bool avail_in_ES;
+ 
+    /**
+     * Flag in the gl_extensions struct indicating whether this
+     * extension is supported by the driver, or
+     * &gl_extensions::dummy_true if supported by all drivers.
+     *
+     * Note: the type (GLboolean gl_extensions::*) is a "pointer to
+     * member" type, the type-safe alternative to the "offsetof" macro.
+     * In a nutshell:
+     *
+     * - foo bar::* p declares p to be an "offset" to a field of type
+     *   foo that exists within struct bar
+     * - &bar::baz computes the "offset" of field baz within struct bar
+     * - x.*p accesses the field of x that exists at "offset" p
+     * - x->*p is equivalent to (*x).*p
+     */
+    const GLboolean gl_extensions::* supported_flag;
+ 
+    /**
+     * Flag in the _mesa_glsl_parse_state struct that should be set
+     * when this extension is enabled.
+     *
+     * See note in _mesa_glsl_extension::supported_flag about "pointer
+     * to member" types.
+     */
+    bool _mesa_glsl_parse_state::* enable_flag;
+ 
+    /**
+     * Flag in the _mesa_glsl_parse_state struct that should be set
+     * when the shader requests "warn" behavior for this extension.
+     *
+     * See note in _mesa_glsl_extension::supported_flag about "pointer
+     * to member" types.
+     */
+    bool _mesa_glsl_parse_state::* warn_flag;
+ 
+ 
+    bool compatible_with_state(const _mesa_glsl_parse_state *state) const;
+    void set_flags(_mesa_glsl_parse_state *state, ext_behavior behavior) const;
+ };
+ 
+ #define EXT(NAME, GL, ES, SUPPORTED_FLAG)                   \
+    { "GL_" #NAME, GL, ES, &gl_extensions::SUPPORTED_FLAG,   \
+          &_mesa_glsl_parse_state::NAME##_enable,            \
+          &_mesa_glsl_parse_state::NAME##_warn }
+ 
+ /**
+  * Table of extensions that can be enabled/disabled within a shader,
+  * and the conditions under which they are supported.
+  */
+ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
+    /*                                  API availability */
+    /* name                             GL     ES         supported flag */
+ 
+    /* ARB extensions go here, sorted alphabetically.
+     */
+    EXT(ARB_arrays_of_arrays,             true,  false,     ARB_arrays_of_arrays),
+    EXT(ARB_compute_shader,               true,  false,     ARB_compute_shader),
+    EXT(ARB_conservative_depth,           true,  false,     ARB_conservative_depth),
+    EXT(ARB_derivative_control,           true,  false,     ARB_derivative_control),
+    EXT(ARB_draw_buffers,                 true,  false,     dummy_true),
+    EXT(ARB_draw_instanced,               true,  false,     ARB_draw_instanced),
+    EXT(ARB_enhanced_layouts,             true,  false,     ARB_enhanced_layouts),
+    EXT(ARB_explicit_attrib_location,     true,  false,     ARB_explicit_attrib_location),
+    EXT(ARB_explicit_uniform_location,    true,  false,     ARB_explicit_uniform_location),
+    EXT(ARB_fragment_coord_conventions,   true,  false,     ARB_fragment_coord_conventions),
+    EXT(ARB_fragment_layer_viewport,      true,  false,     ARB_fragment_layer_viewport),
+    EXT(ARB_gpu_shader5,                  true,  false,     ARB_gpu_shader5),
+    EXT(ARB_gpu_shader_fp64,              true,  false,     ARB_gpu_shader_fp64),
+    EXT(ARB_sample_shading,               true,  false,     ARB_sample_shading),
+    EXT(ARB_separate_shader_objects,      true,  false,     dummy_true),
+    EXT(ARB_shader_atomic_counters,       true,  false,     ARB_shader_atomic_counters),
+    EXT(ARB_shader_bit_encoding,          true,  false,     ARB_shader_bit_encoding),
+    EXT(ARB_shader_clock,                 true,  false,     ARB_shader_clock),
+    EXT(ARB_shader_draw_parameters,       true,  false,     ARB_shader_draw_parameters),
+    EXT(ARB_shader_image_load_store,      true,  false,     ARB_shader_image_load_store),
+    EXT(ARB_shader_image_size,            true,  false,     ARB_shader_image_size),
+    EXT(ARB_shader_precision,             true,  false,     ARB_shader_precision),
+    EXT(ARB_shader_stencil_export,        true,  false,     ARB_shader_stencil_export),
+    EXT(ARB_shader_storage_buffer_object, true,  true,      ARB_shader_storage_buffer_object),
+    EXT(ARB_shader_subroutine,            true,  false,     ARB_shader_subroutine),
+    EXT(ARB_shader_texture_image_samples, true,  false,     ARB_shader_texture_image_samples),
+    EXT(ARB_shader_texture_lod,           true,  false,     ARB_shader_texture_lod),
+    EXT(ARB_shading_language_420pack,     true,  false,     ARB_shading_language_420pack),
+    EXT(ARB_shading_language_packing,     true,  false,     ARB_shading_language_packing),
+    EXT(ARB_tessellation_shader,          true,  false,     ARB_tessellation_shader),
+    EXT(ARB_texture_cube_map_array,       true,  false,     ARB_texture_cube_map_array),
+    EXT(ARB_texture_gather,               true,  false,     ARB_texture_gather),
+    EXT(ARB_texture_multisample,          true,  false,     ARB_texture_multisample),
+    EXT(ARB_texture_query_levels,         true,  false,     ARB_texture_query_levels),
+    EXT(ARB_texture_query_lod,            true,  false,     ARB_texture_query_lod),
+    EXT(ARB_texture_rectangle,            true,  false,     dummy_true),
+    EXT(ARB_uniform_buffer_object,        true,  false,     ARB_uniform_buffer_object),
+    EXT(ARB_vertex_attrib_64bit,          true,  false,     ARB_vertex_attrib_64bit),
+    EXT(ARB_viewport_array,               true,  false,     ARB_viewport_array),
+ 
+    /* KHR extensions go here, sorted alphabetically.
+     */
+ 
+    /* OES extensions go here, sorted alphabetically.
+     */
+    EXT(OES_EGL_image_external,         false, true,      OES_EGL_image_external),
+    EXT(OES_geometry_shader,            false, true,      OES_geometry_shader),
+    EXT(OES_standard_derivatives,       false, true,      OES_standard_derivatives),
+    EXT(OES_texture_3D,                 false, true,      dummy_true),
+    EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample),
+ 
+    /* All other extensions go here, sorted alphabetically.
+     */
+    EXT(AMD_conservative_depth,         true,  false,     ARB_conservative_depth),
+    EXT(AMD_shader_stencil_export,      true,  false,     ARB_shader_stencil_export),
+    EXT(AMD_shader_trinary_minmax,      true,  false,     dummy_true),
+    EXT(AMD_vertex_shader_layer,        true,  false,     AMD_vertex_shader_layer),
+    EXT(AMD_vertex_shader_viewport_index, true,  false,   AMD_vertex_shader_viewport_index),
+    EXT(EXT_blend_func_extended,        false,  true,     ARB_blend_func_extended),
+    EXT(EXT_draw_buffers,               false,  true,     dummy_true),
+    EXT(EXT_separate_shader_objects,    false, true,      dummy_true),
+    EXT(EXT_shader_integer_mix,         true,  true,      EXT_shader_integer_mix),
+    EXT(EXT_shader_samples_identical,   true,  true,      EXT_shader_samples_identical),
+    EXT(EXT_texture_array,              true,  false,     EXT_texture_array),
+ };
+ 
+ #undef EXT
+ 
+ 
+ /**
+  * Determine whether a given extension is compatible with the target,
+  * API, and extension information in the current parser state.
+  */
+ bool _mesa_glsl_extension::compatible_with_state(const _mesa_glsl_parse_state *
+                                                  state) const
+ {
+    /* Check that this extension matches whether we are compiling
+     * for desktop GL or GLES.
+     */
+    if (state->es_shader) {
+       if (!this->avail_in_ES) return false;
+    } else {
+       if (!this->avail_in_GL) return false;
+    }
+ 
+    /* Check that this extension is supported by the OpenGL
+     * implementation.
+     *
+     * Note: the ->* operator indexes into state->extensions by the
+     * offset this->supported_flag.  See
+     * _mesa_glsl_extension::supported_flag for more info.
+     */
+    return state->extensions->*(this->supported_flag);
+ }
+ 
+ /**
+  * Set the appropriate flags in the parser state to establish the
+  * given behavior for this extension.
+  */
+ void _mesa_glsl_extension::set_flags(_mesa_glsl_parse_state *state,
+                                      ext_behavior behavior) const
+ {
+    /* Note: the ->* operator indexes into state by the
+     * offsets this->enable_flag and this->warn_flag.  See
+     * _mesa_glsl_extension::supported_flag for more info.
+     */
+    state->*(this->enable_flag) = (behavior != extension_disable);
+    state->*(this->warn_flag)   = (behavior == extension_warn);
+ }
+ 
+ /**
+  * Find an extension by name in _mesa_glsl_supported_extensions.  If
+  * the name is not found, return NULL.
+  */
+ static const _mesa_glsl_extension *find_extension(const char *name)
+ {
+    for (unsigned i = 0; i < ARRAY_SIZE(_mesa_glsl_supported_extensions); ++i) {
+       if (strcmp(name, _mesa_glsl_supported_extensions[i].name) == 0) {
+          return &_mesa_glsl_supported_extensions[i];
+       }
+    }
+    return NULL;
+ }
+ 
+ 
+ bool
+ _mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp,
+                            const char *behavior_string, YYLTYPE *behavior_locp,
+                            _mesa_glsl_parse_state *state)
+ {
+    ext_behavior behavior;
+    if (strcmp(behavior_string, "warn") == 0) {
+       behavior = extension_warn;
+    } else if (strcmp(behavior_string, "require") == 0) {
+       behavior = extension_require;
+    } else if (strcmp(behavior_string, "enable") == 0) {
+       behavior = extension_enable;
+    } else if (strcmp(behavior_string, "disable") == 0) {
+       behavior = extension_disable;
+    } else {
+       _mesa_glsl_error(behavior_locp, state,
+                      "unknown extension behavior `%s'",
+                      behavior_string);
+       return false;
+    }
+ 
+    if (strcmp(name, "all") == 0) {
+       if ((behavior == extension_enable) || (behavior == extension_require)) {
+        _mesa_glsl_error(name_locp, state, "cannot %s all extensions",
+                         (behavior == extension_enable)
+                         ? "enable" : "require");
+        return false;
+       } else {
+          for (unsigned i = 0;
+               i < ARRAY_SIZE(_mesa_glsl_supported_extensions); ++i) {
+             const _mesa_glsl_extension *extension
+                = &_mesa_glsl_supported_extensions[i];
+             if (extension->compatible_with_state(state)) {
+                _mesa_glsl_supported_extensions[i].set_flags(state, behavior);
+             }
+          }
+       }
+    } else {
+       const _mesa_glsl_extension *extension = find_extension(name);
+       if (extension && extension->compatible_with_state(state)) {
+          extension->set_flags(state, behavior);
+       } else {
+          static const char fmt[] = "extension `%s' unsupported in %s shader";
+ 
+          if (behavior == extension_require) {
+             _mesa_glsl_error(name_locp, state, fmt,
+                              name, _mesa_shader_stage_to_string(state->stage));
+             return false;
+          } else {
+             _mesa_glsl_warning(name_locp, state, fmt,
+                                name, _mesa_shader_stage_to_string(state->stage));
+          }
+       }
+    }
+ 
+    return true;
+ }
+ 
+ 
+ /**
+  * Recurses through <type> and <expr> if <expr> is an aggregate initializer
+  * and sets <expr>'s <constructor_type> field to <type>. Gives later functions
+  * (process_array_constructor, et al) sufficient information to do type
+  * checking.
+  *
+  * Operates on assignments involving an aggregate initializer. E.g.,
+  *
+  * vec4 pos = {1.0, -1.0, 0.0, 1.0};
+  *
+  * or more ridiculously,
+  *
+  * struct S {
+  *     vec4 v[2];
+  * };
+  *
+  * struct {
+  *     S a[2], b;
+  *     int c;
+  * } aggregate = {
+  *     {
+  *         {
+  *             {
+  *                 {1.0, 2.0, 3.0, 4.0}, // a[0].v[0]
+  *                 {5.0, 6.0, 7.0, 8.0}  // a[0].v[1]
+  *             } // a[0].v
+  *         }, // a[0]
+  *         {
+  *             {
+  *                 {1.0, 2.0, 3.0, 4.0}, // a[1].v[0]
+  *                 {5.0, 6.0, 7.0, 8.0}  // a[1].v[1]
+  *             } // a[1].v
+  *         } // a[1]
+  *     }, // a
+  *     {
+  *         {
+  *             {1.0, 2.0, 3.0, 4.0}, // b.v[0]
+  *             {5.0, 6.0, 7.0, 8.0}  // b.v[1]
+  *         } // b.v
+  *     }, // b
+  *     4 // c
+  * };
+  *
+  * This pass is necessary because the right-hand side of <type> e = { ... }
+  * doesn't contain sufficient information to determine if the types match.
+  */
+ void
+ _mesa_ast_set_aggregate_type(const glsl_type *type,
+                              ast_expression *expr)
+ {
+    ast_aggregate_initializer *ai = (ast_aggregate_initializer *)expr;
+    ai->constructor_type = type;
+ 
+    /* If the aggregate is an array, recursively set its elements' types. */
+    if (type->is_array()) {
+       /* Each array element has the type type->fields.array.
+        *
+        * E.g., if <type> if struct S[2] we want to set each element's type to
+        * struct S.
+        */
+       for (exec_node *expr_node = ai->expressions.head;
+            !expr_node->is_tail_sentinel();
+            expr_node = expr_node->next) {
+          ast_expression *expr = exec_node_data(ast_expression, expr_node,
+                                                link);
+ 
+          if (expr->oper == ast_aggregate)
+             _mesa_ast_set_aggregate_type(type->fields.array, expr);
+       }
+ 
+    /* If the aggregate is a struct, recursively set its fields' types. */
+    } else if (type->is_record()) {
+       exec_node *expr_node = ai->expressions.head;
+ 
+       /* Iterate through the struct's fields. */
+       for (unsigned i = 0; !expr_node->is_tail_sentinel() && i < type->length;
+            i++, expr_node = expr_node->next) {
+          ast_expression *expr = exec_node_data(ast_expression, expr_node,
+                                                link);
+ 
+          if (expr->oper == ast_aggregate) {
+             _mesa_ast_set_aggregate_type(type->fields.structure[i].type, expr);
+          }
+       }
+    /* If the aggregate is a matrix, set its columns' types. */
+    } else if (type->is_matrix()) {
+       for (exec_node *expr_node = ai->expressions.head;
+            !expr_node->is_tail_sentinel();
+            expr_node = expr_node->next) {
+          ast_expression *expr = exec_node_data(ast_expression, expr_node,
+                                                link);
+ 
+          if (expr->oper == ast_aggregate)
+             _mesa_ast_set_aggregate_type(type->column_type(), expr);
+       }
+    }
+ }
+ 
+ void
+ _mesa_ast_process_interface_block(YYLTYPE *locp,
+                                   _mesa_glsl_parse_state *state,
+                                   ast_interface_block *const block,
+                                   const struct ast_type_qualifier &q)
+ {
+    if (q.flags.q.buffer) {
+       if (!state->has_shader_storage_buffer_objects()) {
+          _mesa_glsl_error(locp, state,
+                           "#version 430 / GL_ARB_shader_storage_buffer_object "
+                           "required for defining shader storage blocks");
+       } else if (state->ARB_shader_storage_buffer_object_warn) {
+          _mesa_glsl_warning(locp, state,
+                             "#version 430 / GL_ARB_shader_storage_buffer_object "
+                             "required for defining shader storage blocks");
+       }
+    } else if (q.flags.q.uniform) {
+       if (!state->has_uniform_buffer_objects()) {
+          _mesa_glsl_error(locp, state,
+                           "#version 140 / GL_ARB_uniform_buffer_object "
+                           "required for defining uniform blocks");
+       } else if (state->ARB_uniform_buffer_object_warn) {
+          _mesa_glsl_warning(locp, state,
+                             "#version 140 / GL_ARB_uniform_buffer_object "
+                             "required for defining uniform blocks");
+       }
+    } else {
+       if (state->es_shader || state->language_version < 150) {
+          _mesa_glsl_error(locp, state,
+                           "#version 150 required for using "
+                           "interface blocks");
+       }
+    }
+ 
+    /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"):
+     * "It is illegal to have an input block in a vertex shader
+     *  or an output block in a fragment shader"
+     */
+    if ((state->stage == MESA_SHADER_VERTEX) && q.flags.q.in) {
+       _mesa_glsl_error(locp, state,
+                        "`in' interface block is not allowed for "
+                        "a vertex shader");
+    } else if ((state->stage == MESA_SHADER_FRAGMENT) && q.flags.q.out) {
+       _mesa_glsl_error(locp, state,
+                        "`out' interface block is not allowed for "
+                        "a fragment shader");
+    }
+ 
+    /* Since block arrays require names, and both features are added in
+     * the same language versions, we don't have to explicitly
+     * version-check both things.
+     */
+    if (block->instance_name != NULL) {
+       state->check_version(150, 300, locp, "interface blocks with "
+                            "an instance name are not allowed");
+    }
+ 
+    uint64_t interface_type_mask;
+    struct ast_type_qualifier temp_type_qualifier;
+ 
+    /* Get a bitmask containing only the in/out/uniform/buffer
+     * flags, allowing us to ignore other irrelevant flags like
+     * interpolation qualifiers.
+     */
+    temp_type_qualifier.flags.i = 0;
+    temp_type_qualifier.flags.q.uniform = true;
+    temp_type_qualifier.flags.q.in = true;
+    temp_type_qualifier.flags.q.out = true;
+    temp_type_qualifier.flags.q.buffer = true;
+    interface_type_mask = temp_type_qualifier.flags.i;
+ 
+    /* Get the block's interface qualifier.  The interface_qualifier
+     * production rule guarantees that only one bit will be set (and
+     * it will be in/out/uniform).
+     */
+    uint64_t block_interface_qualifier = q.flags.i;
+ 
+    block->layout.flags.i |= block_interface_qualifier;
+ 
+    if (state->stage == MESA_SHADER_GEOMETRY &&
+        state->has_explicit_attrib_stream()) {
+       /* Assign global layout's stream value. */
+       block->layout.flags.q.stream = 1;
+       block->layout.flags.q.explicit_stream = 0;
+       block->layout.stream = state->out_qualifier->stream;
+    }
+ 
+    foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
+       ast_type_qualifier& qualifier = member->type->qualifier;
+       if ((qualifier.flags.i & interface_type_mask) == 0) {
+          /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+           * "If no optional qualifier is used in a member declaration, the
+           *  qualifier of the variable is just in, out, or uniform as declared
+           *  by interface-qualifier."
+           */
+          qualifier.flags.i |= block_interface_qualifier;
+       } else if ((qualifier.flags.i & interface_type_mask) !=
+                  block_interface_qualifier) {
+          /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+           * "If optional qualifiers are used, they can include interpolation
+           *  and storage qualifiers and they must declare an input, output,
+           *  or uniform variable consistent with the interface qualifier of
+           *  the block."
+           */
+          _mesa_glsl_error(locp, state,
+                           "uniform/in/out qualifier on "
+                           "interface block member does not match "
+                           "the interface block");
+       }
+ 
+       /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
+        *
+        * "GLSL ES 3.0 does not support interface blocks for shader inputs or
+        * outputs."
+        *
+        * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
+        *
+        * "Only variables output from a shader can be candidates for
+        * invariance."
+        *
+        * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
+        *
+        * "If optional qualifiers are used, they can include interpolation
+        * qualifiers, auxiliary storage qualifiers, and storage qualifiers
+        * and they must declare an input, output, or uniform member
+        * consistent with the interface qualifier of the block"
+        */
+       if (qualifier.flags.q.invariant)
+          _mesa_glsl_error(locp, state,
+                           "invariant qualifiers cannot be used "
+                           "with interface blocks members");
+    }
+ }
+ 
+ void
+ _mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q)
+ {
+    if (q->flags.q.subroutine)
+       printf("subroutine ");
+ 
+    if (q->flags.q.subroutine_def) {
+       printf("subroutine (");
+       q->subroutine_list->print();
+       printf(")");
+    }
+ 
+    if (q->flags.q.constant)
+       printf("const ");
+ 
+    if (q->flags.q.invariant)
+       printf("invariant ");
+ 
+    if (q->flags.q.attribute)
+       printf("attribute ");
+ 
+    if (q->flags.q.varying)
+       printf("varying ");
+ 
+    if (q->flags.q.in && q->flags.q.out)
+       printf("inout ");
+    else {
+       if (q->flags.q.in)
+        printf("in ");
+ 
+       if (q->flags.q.out)
+        printf("out ");
+    }
+ 
+    if (q->flags.q.centroid)
+       printf("centroid ");
+    if (q->flags.q.sample)
+       printf("sample ");
+    if (q->flags.q.patch)
+       printf("patch ");
+    if (q->flags.q.uniform)
+       printf("uniform ");
+    if (q->flags.q.buffer)
+       printf("buffer ");
+    if (q->flags.q.smooth)
+       printf("smooth ");
+    if (q->flags.q.flat)
+       printf("flat ");
+    if (q->flags.q.noperspective)
+       printf("noperspective ");
+ }
+ 
+ 
+ void
+ ast_node::print(void) const
+ {
+    printf("unhandled node ");
+ }
+ 
+ 
+ ast_node::ast_node(void)
+ {
+    this->location.source = 0;
+    this->location.first_line = 0;
+    this->location.first_column = 0;
+    this->location.last_line = 0;
+    this->location.last_column = 0;
+ }
+ 
+ 
+ static void
+ ast_opt_array_dimensions_print(const ast_array_specifier *array_specifier)
+ {
+    if (array_specifier)
+       array_specifier->print();
+ }
+ 
+ 
+ void
+ ast_compound_statement::print(void) const
+ {
+    printf("{\n");
+ 
+    foreach_list_typed(ast_node, ast, link, &this->statements) {
+       ast->print();
+    }
+ 
+    printf("}\n");
+ }
+ 
+ 
+ ast_compound_statement::ast_compound_statement(int new_scope,
+                                              ast_node *statements)
+ {
+    this->new_scope = new_scope;
+ 
+    if (statements != NULL) {
+       this->statements.push_degenerate_list_at_head(&statements->link);
+    }
+ }
+ 
+ 
+ void
+ ast_expression::print(void) const
+ {
+    switch (oper) {
+    case ast_assign:
+    case ast_mul_assign:
+    case ast_div_assign:
+    case ast_mod_assign:
+    case ast_add_assign:
+    case ast_sub_assign:
+    case ast_ls_assign:
+    case ast_rs_assign:
+    case ast_and_assign:
+    case ast_xor_assign:
+    case ast_or_assign:
+       subexpressions[0]->print();
+       printf("%s ", operator_string(oper));
+       subexpressions[1]->print();
+       break;
+ 
+    case ast_field_selection:
+       subexpressions[0]->print();
+       printf(". %s ", primary_expression.identifier);
+       break;
+ 
+    case ast_plus:
+    case ast_neg:
+    case ast_bit_not:
+    case ast_logic_not:
+    case ast_pre_inc:
+    case ast_pre_dec:
+       printf("%s ", operator_string(oper));
+       subexpressions[0]->print();
+       break;
+ 
+    case ast_post_inc:
+    case ast_post_dec:
+       subexpressions[0]->print();
+       printf("%s ", operator_string(oper));
+       break;
+ 
+    case ast_conditional:
+       subexpressions[0]->print();
+       printf("? ");
+       subexpressions[1]->print();
+       printf(": ");
+       subexpressions[2]->print();
+       break;
+ 
+    case ast_array_index:
+       subexpressions[0]->print();
+       printf("[ ");
+       subexpressions[1]->print();
+       printf("] ");
+       break;
+ 
+    case ast_function_call: {
+       subexpressions[0]->print();
+       printf("( ");
+ 
+       foreach_list_typed (ast_node, ast, link, &this->expressions) {
+        if (&ast->link != this->expressions.get_head())
+           printf(", ");
+ 
+        ast->print();
+       }
+ 
+       printf(") ");
+       break;
+    }
+ 
+    case ast_identifier:
+       printf("%s ", primary_expression.identifier);
+       break;
+ 
+    case ast_int_constant:
+       printf("%d ", primary_expression.int_constant);
+       break;
+ 
+    case ast_uint_constant:
+       printf("%u ", primary_expression.uint_constant);
+       break;
+ 
+    case ast_float_constant:
+       printf("%f ", primary_expression.float_constant);
+       break;
+ 
+    case ast_double_constant:
+       printf("%f ", primary_expression.double_constant);
+       break;
+ 
+    case ast_bool_constant:
+       printf("%s ",
+            primary_expression.bool_constant
+            ? "true" : "false");
+       break;
+ 
+    case ast_sequence: {
+       printf("( ");
+       foreach_list_typed (ast_node, ast, link, & this->expressions) {
+        if (&ast->link != this->expressions.get_head())
+           printf(", ");
+ 
+        ast->print();
+       }
+       printf(") ");
+       break;
+    }
+ 
+    case ast_aggregate: {
+       printf("{ ");
+       foreach_list_typed (ast_node, ast, link, & this->expressions) {
+        if (&ast->link != this->expressions.get_head())
+           printf(", ");
+ 
+        ast->print();
+       }
+       printf("} ");
+       break;
+    }
+ 
+    default:
+       assert(0);
+       break;
+    }
+ }
+ 
+ ast_expression::ast_expression(int oper,
+                              ast_expression *ex0,
+                              ast_expression *ex1,
+                              ast_expression *ex2) :
+    primary_expression()
+ {
+    this->oper = ast_operators(oper);
+    this->subexpressions[0] = ex0;
+    this->subexpressions[1] = ex1;
+    this->subexpressions[2] = ex2;
+    this->non_lvalue_description = NULL;
+ }
+ 
+ 
+ void
+ ast_expression_statement::print(void) const
+ {
+    if (expression)
+       expression->print();
+ 
+    printf("; ");
+ }
+ 
+ 
+ ast_expression_statement::ast_expression_statement(ast_expression *ex) :
+    expression(ex)
+ {
+    /* empty */
+ }
+ 
+ 
+ void
+ ast_function::print(void) const
+ {
+    return_type->print();
+    printf(" %s (", identifier);
+ 
+    foreach_list_typed(ast_node, ast, link, & this->parameters) {
+       ast->print();
+    }
+ 
+    printf(")");
+ }
+ 
+ 
+ ast_function::ast_function(void)
+    : return_type(NULL), identifier(NULL), is_definition(false),
+      signature(NULL)
+ {
+    /* empty */
+ }
+ 
+ 
+ void
+ ast_fully_specified_type::print(void) const
+ {
+    _mesa_ast_type_qualifier_print(& qualifier);
+    specifier->print();
+ }
+ 
+ 
+ void
+ ast_parameter_declarator::print(void) const
+ {
+    type->print();
+    if (identifier)
+       printf("%s ", identifier);
+    ast_opt_array_dimensions_print(array_specifier);
+ }
+ 
+ 
+ void
+ ast_function_definition::print(void) const
+ {
+    prototype->print();
+    body->print();
+ }
+ 
+ 
+ void
+ ast_declaration::print(void) const
+ {
+    printf("%s ", identifier);
+    ast_opt_array_dimensions_print(array_specifier);
+ 
+    if (initializer) {
+       printf("= ");
+       initializer->print();
+    }
+ }
+ 
+ 
+ ast_declaration::ast_declaration(const char *identifier,
+                                ast_array_specifier *array_specifier,
+                                ast_expression *initializer)
+ {
+    this->identifier = identifier;
+    this->array_specifier = array_specifier;
+    this->initializer = initializer;
+ }
+ 
+ 
+ void
+ ast_declarator_list::print(void) const
+ {
+    assert(type || invariant);
+ 
+    if (type)
+       type->print();
+    else if (invariant)
+       printf("invariant ");
+    else
+       printf("precise ");
+ 
+    foreach_list_typed (ast_node, ast, link, & this->declarations) {
+       if (&ast->link != this->declarations.get_head())
+        printf(", ");
+ 
+       ast->print();
+    }
+ 
+    printf("; ");
+ }
+ 
+ 
+ ast_declarator_list::ast_declarator_list(ast_fully_specified_type *type)
+ {
+    this->type = type;
+    this->invariant = false;
+    this->precise = false;
+ }
+ 
+ void
+ ast_jump_statement::print(void) const
+ {
+    switch (mode) {
+    case ast_continue:
+       printf("continue; ");
+       break;
+    case ast_break:
+       printf("break; ");
+       break;
+    case ast_return:
+       printf("return ");
+       if (opt_return_value)
+        opt_return_value->print();
+ 
+       printf("; ");
+       break;
+    case ast_discard:
+       printf("discard; ");
+       break;
+    }
+ }
+ 
+ 
+ ast_jump_statement::ast_jump_statement(int mode, ast_expression *return_value)
+    : opt_return_value(NULL)
+ {
+    this->mode = ast_jump_modes(mode);
+ 
+    if (mode == ast_return)
+       opt_return_value = return_value;
+ }
+ 
+ 
+ void
+ ast_selection_statement::print(void) const
+ {
+    printf("if ( ");
+    condition->print();
+    printf(") ");
+ 
+    then_statement->print();
+ 
+    if (else_statement) {
+       printf("else ");
+       else_statement->print();
+    }
+ }
+ 
+ 
+ ast_selection_statement::ast_selection_statement(ast_expression *condition,
+                                                ast_node *then_statement,
+                                                ast_node *else_statement)
+ {
+    this->condition = condition;
+    this->then_statement = then_statement;
+    this->else_statement = else_statement;
+ }
+ 
+ 
+ void
+ ast_switch_statement::print(void) const
+ {
+    printf("switch ( ");
+    test_expression->print();
+    printf(") ");
+ 
+    body->print();
+ }
+ 
+ 
+ ast_switch_statement::ast_switch_statement(ast_expression *test_expression,
+                                          ast_node *body)
+ {
+    this->test_expression = test_expression;
+    this->body = body;
+ }
+ 
+ 
+ void
+ ast_switch_body::print(void) const
+ {
+    printf("{\n");
+    if (stmts != NULL) {
+       stmts->print();
+    }
+    printf("}\n");
+ }
+ 
+ 
+ ast_switch_body::ast_switch_body(ast_case_statement_list *stmts)
+ {
+    this->stmts = stmts;
+ }
+ 
+ 
+ void ast_case_label::print(void) const
+ {
+    if (test_value != NULL) {
+       printf("case ");
+       test_value->print();
+       printf(": ");
+    } else {
+       printf("default: ");
+    }
+ }
+ 
+ 
+ ast_case_label::ast_case_label(ast_expression *test_value)
+ {
+    this->test_value = test_value;
+ }
+ 
+ 
+ void ast_case_label_list::print(void) const
+ {
+    foreach_list_typed(ast_node, ast, link, & this->labels) {
+       ast->print();
+    }
+    printf("\n");
+ }
+ 
+ 
+ ast_case_label_list::ast_case_label_list(void)
+ {
+ }
+ 
+ 
+ void ast_case_statement::print(void) const
+ {
+    labels->print();
+    foreach_list_typed(ast_node, ast, link, & this->stmts) {
+       ast->print();
+       printf("\n");
+    }
+ }
+ 
+ 
+ ast_case_statement::ast_case_statement(ast_case_label_list *labels)
+ {
+    this->labels = labels;
+ }
+ 
+ 
+ void ast_case_statement_list::print(void) const
+ {
+    foreach_list_typed(ast_node, ast, link, & this->cases) {
+       ast->print();
+    }
+ }
+ 
+ 
+ ast_case_statement_list::ast_case_statement_list(void)
+ {
+ }
+ 
+ 
+ void
+ ast_iteration_statement::print(void) const
+ {
+    switch (mode) {
+    case ast_for:
+       printf("for( ");
+       if (init_statement)
+        init_statement->print();
+       printf("; ");
+ 
+       if (condition)
+        condition->print();
+       printf("; ");
+ 
+       if (rest_expression)
+        rest_expression->print();
+       printf(") ");
+ 
+       body->print();
+       break;
+ 
+    case ast_while:
+       printf("while ( ");
+       if (condition)
+        condition->print();
+       printf(") ");
+       body->print();
+       break;
+ 
+    case ast_do_while:
+       printf("do ");
+       body->print();
+       printf("while ( ");
+       if (condition)
+        condition->print();
+       printf("); ");
+       break;
+    }
+ }
+ 
+ 
+ ast_iteration_statement::ast_iteration_statement(int mode,
+                                                ast_node *init,
+                                                ast_node *condition,
+                                                ast_expression *rest_expression,
+                                                ast_node *body)
+ {
+    this->mode = ast_iteration_modes(mode);
+    this->init_statement = init;
+    this->condition = condition;
+    this->rest_expression = rest_expression;
+    this->body = body;
+ }
+ 
+ 
+ void
+ ast_struct_specifier::print(void) const
+ {
+    printf("struct %s { ", name);
+    foreach_list_typed(ast_node, ast, link, &this->declarations) {
+       ast->print();
+    }
+    printf("} ");
+ }
+ 
+ 
+ ast_struct_specifier::ast_struct_specifier(const char *identifier,
+                                          ast_declarator_list *declarator_list)
+ {
+    if (identifier == NULL) {
+       static mtx_t mutex = _MTX_INITIALIZER_NP;
+       static unsigned anon_count = 1;
+       unsigned count;
+ 
+       mtx_lock(&mutex);
+       count = anon_count++;
+       mtx_unlock(&mutex);
+ 
+       identifier = ralloc_asprintf(this, "#anon_struct_%04x", count);
+    }
+    name = identifier;
+    this->declarations.push_degenerate_list_at_head(&declarator_list->link);
+    is_declaration = true;
+ }
+ 
+ void ast_subroutine_list::print(void) const
+ {
+    foreach_list_typed (ast_node, ast, link, & this->declarations) {
+       if (&ast->link != this->declarations.get_head())
+          printf(", ");
+       ast->print();
+    }
+ }
+ 
+ static void
+ set_shader_inout_layout(struct gl_shader *shader,
+                    struct _mesa_glsl_parse_state *state)
+ {
+    /* Should have been prevented by the parser. */
+    if (shader->Stage == MESA_SHADER_TESS_CTRL) {
+       assert(!state->in_qualifier->flags.i);
+    } else if (shader->Stage == MESA_SHADER_TESS_EVAL) {
+       assert(!state->out_qualifier->flags.i);
+    } else if (shader->Stage != MESA_SHADER_GEOMETRY) {
+       assert(!state->in_qualifier->flags.i);
+       assert(!state->out_qualifier->flags.i);
+    }
+ 
+    if (shader->Stage != MESA_SHADER_COMPUTE) {
+       /* Should have been prevented by the parser. */
+       assert(!state->cs_input_local_size_specified);
+    }
+ 
+    if (shader->Stage != MESA_SHADER_FRAGMENT) {
+       /* Should have been prevented by the parser. */
+       assert(!state->fs_uses_gl_fragcoord);
+       assert(!state->fs_redeclares_gl_fragcoord);
+       assert(!state->fs_pixel_center_integer);
+       assert(!state->fs_origin_upper_left);
+       assert(!state->fs_early_fragment_tests);
+    }
+ 
+    switch (shader->Stage) {
+    case MESA_SHADER_TESS_CTRL:
+       shader->TessCtrl.VerticesOut = 0;
+       if (state->tcs_output_vertices_specified) {
+          unsigned vertices;
+          if (state->out_qualifier->vertices->
+                process_qualifier_constant(state, "vertices", &vertices,
+                                           false)) {
+ 
+             YYLTYPE loc = state->out_qualifier->vertices->get_location();
+             if (vertices > state->Const.MaxPatchVertices) {
+                _mesa_glsl_error(&loc, state, "vertices (%d) exceeds "
+                                 "GL_MAX_PATCH_VERTICES", vertices);
+             }
+             shader->TessCtrl.VerticesOut = vertices;
+          }
+       }
+       break;
+    case MESA_SHADER_TESS_EVAL:
+       shader->TessEval.PrimitiveMode = PRIM_UNKNOWN;
+       if (state->in_qualifier->flags.q.prim_type)
+          shader->TessEval.PrimitiveMode = state->in_qualifier->prim_type;
+ 
+       shader->TessEval.Spacing = 0;
+       if (state->in_qualifier->flags.q.vertex_spacing)
+          shader->TessEval.Spacing = state->in_qualifier->vertex_spacing;
+ 
+       shader->TessEval.VertexOrder = 0;
+       if (state->in_qualifier->flags.q.ordering)
+          shader->TessEval.VertexOrder = state->in_qualifier->ordering;
+ 
+       shader->TessEval.PointMode = -1;
+       if (state->in_qualifier->flags.q.point_mode)
+          shader->TessEval.PointMode = state->in_qualifier->point_mode;
+       break;
+    case MESA_SHADER_GEOMETRY:
+       shader->Geom.VerticesOut = 0;
+       if (state->out_qualifier->flags.q.max_vertices) {
+          unsigned qual_max_vertices;
+          if (state->out_qualifier->max_vertices->
+                process_qualifier_constant(state, "max_vertices",
+                                           &qual_max_vertices, true)) {
+             shader->Geom.VerticesOut = qual_max_vertices;
+          }
+       }
+ 
+       if (state->gs_input_prim_type_specified) {
+          shader->Geom.InputType = state->in_qualifier->prim_type;
+       } else {
+          shader->Geom.InputType = PRIM_UNKNOWN;
+       }
+ 
+       if (state->out_qualifier->flags.q.prim_type) {
+          shader->Geom.OutputType = state->out_qualifier->prim_type;
+       } else {
+          shader->Geom.OutputType = PRIM_UNKNOWN;
+       }
+ 
+       shader->Geom.Invocations = 0;
+       if (state->in_qualifier->flags.q.invocations) {
+          unsigned invocations;
+          if (state->in_qualifier->invocations->
+                process_qualifier_constant(state, "invocations",
+                                           &invocations, false)) {
+ 
+             YYLTYPE loc = state->in_qualifier->invocations->get_location();
+             if (invocations > MAX_GEOMETRY_SHADER_INVOCATIONS) {
+                _mesa_glsl_error(&loc, state,
+                                 "invocations (%d) exceeds "
+                                 "GL_MAX_GEOMETRY_SHADER_INVOCATIONS",
+                                 invocations);
+             }
+             shader->Geom.Invocations = invocations;
+          }
+       }
+       break;
+ 
+    case MESA_SHADER_COMPUTE:
+       if (state->cs_input_local_size_specified) {
+          for (int i = 0; i < 3; i++)
+             shader->Comp.LocalSize[i] = state->cs_input_local_size[i];
+       } else {
+          for (int i = 0; i < 3; i++)
+             shader->Comp.LocalSize[i] = 0;
+       }
+       break;
+ 
+    case MESA_SHADER_FRAGMENT:
+       shader->redeclares_gl_fragcoord = state->fs_redeclares_gl_fragcoord;
+       shader->uses_gl_fragcoord = state->fs_uses_gl_fragcoord;
+       shader->pixel_center_integer = state->fs_pixel_center_integer;
+       shader->origin_upper_left = state->fs_origin_upper_left;
+       shader->ARB_fragment_coord_conventions_enable =
+          state->ARB_fragment_coord_conventions_enable;
+       shader->EarlyFragmentTests = state->fs_early_fragment_tests;
+       break;
+ 
+    default:
+       /* Nothing to do. */
+       break;
+    }
+ }
+ 
+ extern "C" {
+ 
+ void
+ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
+                           bool dump_ast, bool dump_hir)
+ {
+    struct _mesa_glsl_parse_state *state =
+       new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader);
+    const char *source = shader->Source;
+ 
+    if (ctx->Const.GenerateTemporaryNames)
+       (void) p_atomic_cmpxchg(&ir_variable::temporaries_allocate_names,
+                               false, true);
+ 
+    state->error = glcpp_preprocess(state, &source, &state->info_log,
+                              &ctx->Extensions, ctx);
+ 
+    if (!state->error) {
+      _mesa_glsl_lexer_ctor(state, source);
+      _mesa_glsl_parse(state);
+      _mesa_glsl_lexer_dtor(state);
+    }
+ 
+    if (dump_ast) {
+       foreach_list_typed(ast_node, ast, link, &state->translation_unit) {
+          ast->print();
+       }
+       printf("\n\n");
+    }
+ 
+    ralloc_free(shader->ir);
+    shader->ir = new(shader) exec_list;
+    if (!state->error && !state->translation_unit.is_empty())
+       _mesa_ast_to_hir(shader->ir, state);
+ 
+    if (!state->error) {
+       validate_ir_tree(shader->ir);
+ 
+       /* Print out the unoptimized IR. */
+       if (dump_hir) {
+          _mesa_print_ir(stdout, shader->ir, state);
+       }
+    }
+ 
+ 
+    if (!state->error && !shader->ir->is_empty()) {
+       struct gl_shader_compiler_options *options =
+          &ctx->Const.ShaderCompilerOptions[shader->Stage];
+ 
+       lower_subroutine(shader->ir, state);
+       /* Do some optimization at compile time to reduce shader IR size
+        * and reduce later work if the same shader is linked multiple times
+        */
+       while (do_common_optimization(shader->ir, false, false, options,
+                                     ctx->Const.NativeIntegers))
+          ;
+ 
+       validate_ir_tree(shader->ir);
+ 
+       enum ir_variable_mode other;
+       switch (shader->Stage) {
+       case MESA_SHADER_VERTEX:
+          other = ir_var_shader_in;
+          break;
+       case MESA_SHADER_FRAGMENT:
+          other = ir_var_shader_out;
+          break;
+       default:
+          /* Something invalid to ensure optimize_dead_builtin_uniforms
+           * doesn't remove anything other than uniforms or constants.
+           */
+          other = ir_var_mode_count;
+          break;
+       }
+ 
+       optimize_dead_builtin_variables(shader->ir, other);
+ 
+       validate_ir_tree(shader->ir);
+    }
+ 
+    if (shader->InfoLog)
+       ralloc_free(shader->InfoLog);
+ 
+    if (!state->error)
+       set_shader_inout_layout(shader, state);
+ 
+    shader->symbols = new(shader->ir) glsl_symbol_table;
+    shader->CompileStatus = !state->error;
+    shader->InfoLog = state->info_log;
+    shader->Version = state->language_version;
+    shader->IsES = state->es_shader;
+    shader->uses_builtin_functions = state->uses_builtin_functions;
+ 
+    /* Retain any live IR, but trash the rest. */
+    reparent_ir(shader->ir, shader->ir);
+ 
+    /* Destroy the symbol table.  Create a new symbol table that contains only
+     * the variables and functions that still exist in the IR.  The symbol
+     * table will be used later during linking.
+     *
+     * There must NOT be any freed objects still referenced by the symbol
+     * table.  That could cause the linker to dereference freed memory.
+     *
+     * We don't have to worry about types or interface-types here because those
+     * are fly-weights that are looked up by glsl_type.
+     */
+    foreach_in_list (ir_instruction, ir, shader->ir) {
+       switch (ir->ir_type) {
+       case ir_type_function:
+          shader->symbols->add_function((ir_function *) ir);
+          break;
+       case ir_type_variable: {
+          ir_variable *const var = (ir_variable *) ir;
+ 
+          if (var->data.mode != ir_var_temporary)
+             shader->symbols->add_variable(var);
+          break;
+       }
+       default:
+          break;
+       }
+    }
+ 
+    _mesa_glsl_initialize_derived_variables(shader);
+ 
+    delete state->symbols;
+    ralloc_free(state);
+ }
+ 
+ } /* extern "C" */
+ /**
+  * Do the set of common optimizations passes
+  *
+  * \param ir                          List of instructions to be optimized
+  * \param linked                      Is the shader linked?  This enables
+  *                                    optimizations passes that remove code at
+  *                                    global scope and could cause linking to
+  *                                    fail.
+  * \param uniform_locations_assigned  Have locations already been assigned for
+  *                                    uniforms?  This prevents the declarations
+  *                                    of unused uniforms from being removed.
+  *                                    The setting of this flag only matters if
+  *                                    \c linked is \c true.
+  * \param max_unroll_iterations       Maximum number of loop iterations to be
+  *                                    unrolled.  Setting to 0 disables loop
+  *                                    unrolling.
+  * \param options                     The driver's preferred shader options.
+  */
+ bool
+ do_common_optimization(exec_list *ir, bool linked,
+                      bool uniform_locations_assigned,
+                        const struct gl_shader_compiler_options *options,
+                        bool native_integers)
+ {
+    GLboolean progress = GL_FALSE;
+ 
+    progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress;
+ 
+    if (linked) {
+       progress = do_function_inlining(ir) || progress;
+       progress = do_dead_functions(ir) || progress;
+       progress = do_structure_splitting(ir) || progress;
+    }
+    progress = do_if_simplification(ir) || progress;
+    progress = opt_flatten_nested_if_blocks(ir) || progress;
+    progress = opt_conditional_discard(ir) || progress;
+    progress = do_copy_propagation(ir) || progress;
+    progress = do_copy_propagation_elements(ir) || progress;
+ 
+    if (options->OptimizeForAOS && !linked)
+       progress = opt_flip_matrices(ir) || progress;
+ 
+    if (linked && options->OptimizeForAOS) {
+       progress = do_vectorize(ir) || progress;
+    }
+ 
+    if (linked)
+       progress = do_dead_code(ir, uniform_locations_assigned) || progress;
+    else
+       progress = do_dead_code_unlinked(ir) || progress;
+    progress = do_dead_code_local(ir) || progress;
+    progress = do_tree_grafting(ir) || progress;
+    progress = do_constant_propagation(ir) || progress;
+    if (linked)
+       progress = do_constant_variable(ir) || progress;
+    else
+       progress = do_constant_variable_unlinked(ir) || progress;
+    progress = do_constant_folding(ir) || progress;
+    progress = do_minmax_prune(ir) || progress;
+    progress = do_rebalance_tree(ir) || progress;
+    progress = do_algebraic(ir, native_integers, options) || progress;
+    progress = do_lower_jumps(ir) || progress;
+    progress = do_vec_index_to_swizzle(ir) || progress;
+    progress = lower_vector_insert(ir, false) || progress;
+    progress = do_swizzle_swizzle(ir) || progress;
+    progress = do_noop_swizzle(ir) || progress;
+ 
+    progress = optimize_split_arrays(ir, linked) || progress;
+    progress = optimize_redundant_jumps(ir) || progress;
+ 
+    loop_state *ls = analyze_loop_variables(ir);
+    if (ls->loop_found) {
+       progress = set_loop_controls(ir, ls) || progress;
+       progress = unroll_loops(ir, ls, options) || progress;
+    }
+    delete ls;
+ 
+    return progress;
+ }
+ 
+ extern "C" {
+ 
+ /**
+  * To be called at GL teardown time, this frees compiler datastructures.
+  *
+  * After calling this, any previously compiled shaders and shader
+  * programs would be invalid.  So this should happen at approximately
+  * program exit.
+  */
+ void
+ _mesa_destroy_shader_compiler(void)
+ {
+    _mesa_destroy_shader_compiler_caches();
+ 
+    _mesa_glsl_release_types();
+ }
+ 
+ /**
+  * Releases compiler caches to trade off performance for memory.
+  *
+  * Intended to be used with glReleaseShaderCompiler().
+  */
+ void
+ _mesa_destroy_shader_compiler_caches(void)
+ {
+    _mesa_glsl_release_builtin_functions();
+ }
+ 
+ }
diff --cc src/compiler/glsl/ir.cpp

index 0000000000000000000000000000000000000000,de9d314bae4300c1ec325c655f0987926f84e506..5debca3241114b00976c3680ffa58dac12fafdd2

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/ir.cpp
+++ b/src/compiler/glsl/ir.cpp
@@@ -1,0 -1,2039 +1,2030 @@@
- -   case ir_unop_unpack_half_2x16_split_x:
- -   case ir_unop_unpack_half_2x16_split_y:
+ /*
+  * Copyright © 2010 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ #include <string.h>
+ #include "main/core.h" /* for MAX2 */
+ #include "ir.h"
+ #include "compiler/glsl_types.h"
+ 
+ ir_rvalue::ir_rvalue(enum ir_node_type t)
+    : ir_instruction(t)
+ {
+    this->type = glsl_type::error_type;
+ }
+ 
+ bool ir_rvalue::is_zero() const
+ {
+    return false;
+ }
+ 
+ bool ir_rvalue::is_one() const
+ {
+    return false;
+ }
+ 
+ bool ir_rvalue::is_negative_one() const
+ {
+    return false;
+ }
+ 
+ /**
+  * Modify the swizzle make to move one component to another
+  *
+  * \param m    IR swizzle to be modified
+  * \param from Component in the RHS that is to be swizzled
+  * \param to   Desired swizzle location of \c from
+  */
+ static void
+ update_rhs_swizzle(ir_swizzle_mask &m, unsigned from, unsigned to)
+ {
+    switch (to) {
+    case 0: m.x = from; break;
+    case 1: m.y = from; break;
+    case 2: m.z = from; break;
+    case 3: m.w = from; break;
+    default: assert(!"Should not get here.");
+    }
+ }
+ 
+ void
+ ir_assignment::set_lhs(ir_rvalue *lhs)
+ {
+    void *mem_ctx = this;
+    bool swizzled = false;
+ 
+    while (lhs != NULL) {
+       ir_swizzle *swiz = lhs->as_swizzle();
+ 
+       if (swiz == NULL)
+        break;
+ 
+       unsigned write_mask = 0;
+       ir_swizzle_mask rhs_swiz = { 0, 0, 0, 0, 0, 0 };
+ 
+       for (unsigned i = 0; i < swiz->mask.num_components; i++) {
+        unsigned c = 0;
+ 
+        switch (i) {
+        case 0: c = swiz->mask.x; break;
+        case 1: c = swiz->mask.y; break;
+        case 2: c = swiz->mask.z; break;
+        case 3: c = swiz->mask.w; break;
+        default: assert(!"Should not get here.");
+        }
+ 
+        write_mask |= (((this->write_mask >> i) & 1) << c);
+        update_rhs_swizzle(rhs_swiz, i, c);
+          rhs_swiz.num_components = swiz->val->type->vector_elements;
+       }
+ 
+       this->write_mask = write_mask;
+       lhs = swiz->val;
+ 
+       this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz);
+       swizzled = true;
+    }
+ 
+    if (swizzled) {
+       /* Now, RHS channels line up with the LHS writemask.  Collapse it
+        * to just the channels that will be written.
+        */
+       ir_swizzle_mask rhs_swiz = { 0, 0, 0, 0, 0, 0 };
+       int rhs_chan = 0;
+       for (int i = 0; i < 4; i++) {
+        if (write_mask & (1 << i))
+           update_rhs_swizzle(rhs_swiz, i, rhs_chan++);
+       }
+       rhs_swiz.num_components = rhs_chan;
+       this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz);
+    }
+ 
+    assert((lhs == NULL) || lhs->as_dereference());
+ 
+    this->lhs = (ir_dereference *) lhs;
+ }
+ 
+ ir_variable *
+ ir_assignment::whole_variable_written()
+ {
+    ir_variable *v = this->lhs->whole_variable_referenced();
+ 
+    if (v == NULL)
+       return NULL;
+ 
+    if (v->type->is_scalar())
+       return v;
+ 
+    if (v->type->is_vector()) {
+       const unsigned mask = (1U << v->type->vector_elements) - 1;
+ 
+       if (mask != this->write_mask)
+        return NULL;
+    }
+ 
+    /* Either all the vector components are assigned or the variable is some
+     * composite type (and the whole thing is assigned.
+     */
+    return v;
+ }
+ 
+ ir_assignment::ir_assignment(ir_dereference *lhs, ir_rvalue *rhs,
+                            ir_rvalue *condition, unsigned write_mask)
+    : ir_instruction(ir_type_assignment)
+ {
+    this->condition = condition;
+    this->rhs = rhs;
+    this->lhs = lhs;
+    this->write_mask = write_mask;
+ 
+    if (lhs->type->is_scalar() || lhs->type->is_vector()) {
+       int lhs_components = 0;
+       for (int i = 0; i < 4; i++) {
+        if (write_mask & (1 << i))
+           lhs_components++;
+       }
+ 
+       assert(lhs_components == this->rhs->type->vector_elements);
+    }
+ }
+ 
+ ir_assignment::ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs,
+                            ir_rvalue *condition)
+    : ir_instruction(ir_type_assignment)
+ {
+    this->condition = condition;
+    this->rhs = rhs;
+ 
+    /* If the RHS is a vector type, assume that all components of the vector
+     * type are being written to the LHS.  The write mask comes from the RHS
+     * because we can have a case where the LHS is a vec4 and the RHS is a
+     * vec3.  In that case, the assignment is:
+     *
+     *     (assign (...) (xyz) (var_ref lhs) (var_ref rhs))
+     */
+    if (rhs->type->is_vector())
+       this->write_mask = (1U << rhs->type->vector_elements) - 1;
+    else if (rhs->type->is_scalar())
+       this->write_mask = 1;
+    else
+       this->write_mask = 0;
+ 
+    this->set_lhs(lhs);
+ }
+ 
+ ir_expression::ir_expression(int op, const struct glsl_type *type,
+                            ir_rvalue *op0, ir_rvalue *op1,
+                            ir_rvalue *op2, ir_rvalue *op3)
+    : ir_rvalue(ir_type_expression)
+ {
+    this->type = type;
+    this->operation = ir_expression_operation(op);
+    this->operands[0] = op0;
+    this->operands[1] = op1;
+    this->operands[2] = op2;
+    this->operands[3] = op3;
+ #ifndef NDEBUG
+    int num_operands = get_num_operands(this->operation);
+    for (int i = num_operands; i < 4; i++) {
+       assert(this->operands[i] == NULL);
+    }
+ #endif
+ }
+ 
+ ir_expression::ir_expression(int op, ir_rvalue *op0)
+    : ir_rvalue(ir_type_expression)
+ {
+    this->operation = ir_expression_operation(op);
+    this->operands[0] = op0;
+    this->operands[1] = NULL;
+    this->operands[2] = NULL;
+    this->operands[3] = NULL;
+ 
+    assert(op <= ir_last_unop);
+ 
+    switch (this->operation) {
+    case ir_unop_bit_not:
+    case ir_unop_logic_not:
+    case ir_unop_neg:
+    case ir_unop_abs:
+    case ir_unop_sign:
+    case ir_unop_rcp:
+    case ir_unop_rsq:
+    case ir_unop_sqrt:
+    case ir_unop_exp:
+    case ir_unop_log:
+    case ir_unop_exp2:
+    case ir_unop_log2:
+    case ir_unop_trunc:
+    case ir_unop_ceil:
+    case ir_unop_floor:
+    case ir_unop_fract:
+    case ir_unop_round_even:
+    case ir_unop_sin:
+    case ir_unop_cos:
+    case ir_unop_dFdx:
+    case ir_unop_dFdx_coarse:
+    case ir_unop_dFdx_fine:
+    case ir_unop_dFdy:
+    case ir_unop_dFdy_coarse:
+    case ir_unop_dFdy_fine:
+    case ir_unop_bitfield_reverse:
+    case ir_unop_interpolate_at_centroid:
+    case ir_unop_saturate:
+       this->type = op0->type;
+       break;
+ 
+    case ir_unop_f2i:
+    case ir_unop_b2i:
+    case ir_unop_u2i:
+    case ir_unop_d2i:
+    case ir_unop_bitcast_f2i:
+    case ir_unop_bit_count:
+    case ir_unop_find_msb:
+    case ir_unop_find_lsb:
+    case ir_unop_subroutine_to_int:
+       this->type = glsl_type::get_instance(GLSL_TYPE_INT,
+                                          op0->type->vector_elements, 1);
+       break;
+ 
+    case ir_unop_b2f:
+    case ir_unop_i2f:
+    case ir_unop_u2f:
+    case ir_unop_d2f:
+    case ir_unop_bitcast_i2f:
+    case ir_unop_bitcast_u2f:
+       this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+                                          op0->type->vector_elements, 1);
+       break;
+ 
+    case ir_unop_f2b:
+    case ir_unop_i2b:
+    case ir_unop_d2b:
+       this->type = glsl_type::get_instance(GLSL_TYPE_BOOL,
+                                          op0->type->vector_elements, 1);
+       break;
+ 
+    case ir_unop_f2d:
+    case ir_unop_i2d:
+    case ir_unop_u2d:
+       this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE,
+                                          op0->type->vector_elements, 1);
+       break;
+ 
+    case ir_unop_i2u:
+    case ir_unop_f2u:
+    case ir_unop_d2u:
+    case ir_unop_bitcast_f2u:
+       this->type = glsl_type::get_instance(GLSL_TYPE_UINT,
+                                          op0->type->vector_elements, 1);
+       break;
+ 
+    case ir_unop_noise:
- -   case ir_binop_pack_half_2x16_split:
- -      this->type = glsl_type::uint_type;
- -      break;
- -
+       this->type = glsl_type::float_type;
+       break;
+ 
+    case ir_unop_unpack_double_2x32:
+       this->type = glsl_type::uvec2_type;
+       break;
+ 
+    case ir_unop_pack_snorm_2x16:
+    case ir_unop_pack_snorm_4x8:
+    case ir_unop_pack_unorm_2x16:
+    case ir_unop_pack_unorm_4x8:
+    case ir_unop_pack_half_2x16:
+       this->type = glsl_type::uint_type;
+       break;
+ 
+    case ir_unop_pack_double_2x32:
+       this->type = glsl_type::double_type;
+       break;
+ 
+    case ir_unop_unpack_snorm_2x16:
+    case ir_unop_unpack_unorm_2x16:
+    case ir_unop_unpack_half_2x16:
+       this->type = glsl_type::vec2_type;
+       break;
+ 
+    case ir_unop_unpack_snorm_4x8:
+    case ir_unop_unpack_unorm_4x8:
+       this->type = glsl_type::vec4_type;
+       break;
+ 
+    case ir_unop_frexp_sig:
+       this->type = op0->type;
+       break;
+    case ir_unop_frexp_exp:
+       this->type = glsl_type::get_instance(GLSL_TYPE_INT,
+                                          op0->type->vector_elements, 1);
+       break;
+ 
+    case ir_unop_get_buffer_size:
+    case ir_unop_ssbo_unsized_array_length:
+       this->type = glsl_type::int_type;
+       break;
+ 
+    default:
+       assert(!"not reached: missing automatic type setup for ir_expression");
+       this->type = op0->type;
+       break;
+    }
+ }
+ 
+ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1)
+    : ir_rvalue(ir_type_expression)
+ {
+    this->operation = ir_expression_operation(op);
+    this->operands[0] = op0;
+    this->operands[1] = op1;
+    this->operands[2] = NULL;
+    this->operands[3] = NULL;
+ 
+    assert(op > ir_last_unop);
+ 
+    switch (this->operation) {
+    case ir_binop_all_equal:
+    case ir_binop_any_nequal:
+       this->type = glsl_type::bool_type;
+       break;
+ 
+    case ir_binop_add:
+    case ir_binop_sub:
+    case ir_binop_min:
+    case ir_binop_max:
+    case ir_binop_pow:
+    case ir_binop_mul:
+    case ir_binop_div:
+    case ir_binop_mod:
+       if (op0->type->is_scalar()) {
+        this->type = op1->type;
+       } else if (op1->type->is_scalar()) {
+        this->type = op0->type;
+       } else {
+          if (this->operation == ir_binop_mul) {
+             this->type = glsl_type::get_mul_type(op0->type, op1->type);
+          } else {
+             assert(op0->type == op1->type);
+             this->type = op0->type;
+          }
+       }
+       break;
+ 
+    case ir_binop_logic_and:
+    case ir_binop_logic_xor:
+    case ir_binop_logic_or:
+    case ir_binop_bit_and:
+    case ir_binop_bit_xor:
+    case ir_binop_bit_or:
+        assert(!op0->type->is_matrix());
+        assert(!op1->type->is_matrix());
+       if (op0->type->is_scalar()) {
+          this->type = op1->type;
+       } else if (op1->type->is_scalar()) {
+          this->type = op0->type;
+       } else {
+           assert(op0->type->vector_elements == op1->type->vector_elements);
+           this->type = op0->type;
+       }
+       break;
+ 
+    case ir_binop_equal:
+    case ir_binop_nequal:
+    case ir_binop_lequal:
+    case ir_binop_gequal:
+    case ir_binop_less:
+    case ir_binop_greater:
+       assert(op0->type == op1->type);
+       this->type = glsl_type::get_instance(GLSL_TYPE_BOOL,
+                                          op0->type->vector_elements, 1);
+       break;
+ 
+    case ir_binop_dot:
+       this->type = op0->type->get_base_type();
+       break;
+ 
- -   "unpackHalf2x16_split_x",
- -   "unpackHalf2x16_split_y",
+    case ir_binop_imul_high:
+    case ir_binop_carry:
+    case ir_binop_borrow:
+    case ir_binop_lshift:
+    case ir_binop_rshift:
+    case ir_binop_ldexp:
+    case ir_binop_interpolate_at_offset:
+    case ir_binop_interpolate_at_sample:
+       this->type = op0->type;
+       break;
+ 
+    case ir_binop_vector_extract:
+       this->type = op0->type->get_scalar_type();
+       break;
+ 
+    default:
+       assert(!"not reached: missing automatic type setup for ir_expression");
+       this->type = glsl_type::float_type;
+    }
+ }
+ 
+ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1,
+                              ir_rvalue *op2)
+    : ir_rvalue(ir_type_expression)
+ {
+    this->operation = ir_expression_operation(op);
+    this->operands[0] = op0;
+    this->operands[1] = op1;
+    this->operands[2] = op2;
+    this->operands[3] = NULL;
+ 
+    assert(op > ir_last_binop && op <= ir_last_triop);
+ 
+    switch (this->operation) {
+    case ir_triop_fma:
+    case ir_triop_lrp:
+    case ir_triop_bitfield_extract:
+    case ir_triop_vector_insert:
+       this->type = op0->type;
+       break;
+ 
+    case ir_triop_csel:
+       this->type = op1->type;
+       break;
+ 
+    default:
+       assert(!"not reached: missing automatic type setup for ir_expression");
+       this->type = glsl_type::float_type;
+    }
+ }
+ 
+ unsigned int
+ ir_expression::get_num_operands(ir_expression_operation op)
+ {
+    assert(op <= ir_last_opcode);
+ 
+    if (op <= ir_last_unop)
+       return 1;
+ 
+    if (op <= ir_last_binop)
+       return 2;
+ 
+    if (op <= ir_last_triop)
+       return 3;
+ 
+    if (op <= ir_last_quadop)
+       return 4;
+ 
+    assert(false);
+    return 0;
+ }
+ 
+ static const char *const operator_strs[] = {
+    "~",
+    "!",
+    "neg",
+    "abs",
+    "sign",
+    "rcp",
+    "rsq",
+    "sqrt",
+    "exp",
+    "log",
+    "exp2",
+    "log2",
+    "f2i",
+    "f2u",
+    "i2f",
+    "f2b",
+    "b2f",
+    "i2b",
+    "b2i",
+    "u2f",
+    "i2u",
+    "u2i",
+    "d2f",
+    "f2d",
+    "d2i",
+    "i2d",
+    "d2u",
+    "u2d",
+    "d2b",
+    "bitcast_i2f",
+    "bitcast_f2i",
+    "bitcast_u2f",
+    "bitcast_f2u",
+    "trunc",
+    "ceil",
+    "floor",
+    "fract",
+    "round_even",
+    "sin",
+    "cos",
+    "dFdx",
+    "dFdxCoarse",
+    "dFdxFine",
+    "dFdy",
+    "dFdyCoarse",
+    "dFdyFine",
+    "packSnorm2x16",
+    "packSnorm4x8",
+    "packUnorm2x16",
+    "packUnorm4x8",
+    "packHalf2x16",
+    "unpackSnorm2x16",
+    "unpackSnorm4x8",
+    "unpackUnorm2x16",
+    "unpackUnorm4x8",
+    "unpackHalf2x16",
- -   "packHalf2x16_split",
+    "bitfield_reverse",
+    "bit_count",
+    "find_msb",
+    "find_lsb",
+    "sat",
+    "packDouble2x32",
+    "unpackDouble2x32",
+    "frexp_sig",
+    "frexp_exp",
+    "noise",
+    "subroutine_to_int",
+    "interpolate_at_centroid",
+    "get_buffer_size",
+    "ssbo_unsized_array_length",
+    "+",
+    "-",
+    "*",
+    "imul_high",
+    "/",
+    "carry",
+    "borrow",
+    "%",
+    "<",
+    ">",
+    "<=",
+    ">=",
+    "==",
+    "!=",
+    "all_equal",
+    "any_nequal",
+    "<<",
+    ">>",
+    "&",
+    "^",
+    "|",
+    "&&",
+    "^^",
+    "||",
+    "dot",
+    "min",
+    "max",
+    "pow",
+    "ubo_load",
+    "ldexp",
+    "vector_extract",
+    "interpolate_at_offset",
+    "interpolate_at_sample",
+    "fma",
+    "lrp",
+    "csel",
+    "bitfield_extract",
+    "vector_insert",
+    "bitfield_insert",
+    "vector",
+ };
+ 
+ const char *ir_expression::operator_string(ir_expression_operation op)
+ {
+    assert((unsigned int) op < ARRAY_SIZE(operator_strs));
+    assert(ARRAY_SIZE(operator_strs) == (ir_quadop_vector + 1));
+    return operator_strs[op];
+ }
+ 
+ const char *ir_expression::operator_string()
+ {
+    return operator_string(this->operation);
+ }
+ 
+ const char*
+ depth_layout_string(ir_depth_layout layout)
+ {
+    switch(layout) {
+    case ir_depth_layout_none:      return "";
+    case ir_depth_layout_any:       return "depth_any";
+    case ir_depth_layout_greater:   return "depth_greater";
+    case ir_depth_layout_less:      return "depth_less";
+    case ir_depth_layout_unchanged: return "depth_unchanged";
+ 
+    default:
+       assert(0);
+       return "";
+    }
+ }
+ 
+ ir_expression_operation
+ ir_expression::get_operator(const char *str)
+ {
+    const int operator_count = sizeof(operator_strs) / sizeof(operator_strs[0]);
+    for (int op = 0; op < operator_count; op++) {
+       if (strcmp(str, operator_strs[op]) == 0)
+        return (ir_expression_operation) op;
+    }
+    return (ir_expression_operation) -1;
+ }
+ 
+ ir_variable *
+ ir_expression::variable_referenced() const
+ {
+    switch (operation) {
+       case ir_binop_vector_extract:
+       case ir_triop_vector_insert:
+          /* We get these for things like a[0] where a is a vector type. In these
+           * cases we want variable_referenced() to return the actual vector
+           * variable this is wrapping.
+           */
+          return operands[0]->variable_referenced();
+       default:
+          return ir_rvalue::variable_referenced();
+    }
+ }
+ 
+ ir_constant::ir_constant()
+    : ir_rvalue(ir_type_constant)
+ {
+ }
+ 
+ ir_constant::ir_constant(const struct glsl_type *type,
+                        const ir_constant_data *data)
+    : ir_rvalue(ir_type_constant)
+ {
+    assert((type->base_type >= GLSL_TYPE_UINT)
+         && (type->base_type <= GLSL_TYPE_BOOL));
+ 
+    this->type = type;
+    memcpy(& this->value, data, sizeof(this->value));
+ }
+ 
+ ir_constant::ir_constant(float f, unsigned vector_elements)
+    : ir_rvalue(ir_type_constant)
+ {
+    assert(vector_elements <= 4);
+    this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT, vector_elements, 1);
+    for (unsigned i = 0; i < vector_elements; i++) {
+       this->value.f[i] = f;
+    }
+    for (unsigned i = vector_elements; i < 16; i++)  {
+       this->value.f[i] = 0;
+    }
+ }
+ 
+ ir_constant::ir_constant(double d, unsigned vector_elements)
+    : ir_rvalue(ir_type_constant)
+ {
+    assert(vector_elements <= 4);
+    this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, vector_elements, 1);
+    for (unsigned i = 0; i < vector_elements; i++) {
+       this->value.d[i] = d;
+    }
+    for (unsigned i = vector_elements; i < 16; i++)  {
+       this->value.d[i] = 0.0;
+    }
+ }
+ 
+ ir_constant::ir_constant(unsigned int u, unsigned vector_elements)
+    : ir_rvalue(ir_type_constant)
+ {
+    assert(vector_elements <= 4);
+    this->type = glsl_type::get_instance(GLSL_TYPE_UINT, vector_elements, 1);
+    for (unsigned i = 0; i < vector_elements; i++) {
+       this->value.u[i] = u;
+    }
+    for (unsigned i = vector_elements; i < 16; i++) {
+       this->value.u[i] = 0;
+    }
+ }
+ 
+ ir_constant::ir_constant(int integer, unsigned vector_elements)
+    : ir_rvalue(ir_type_constant)
+ {
+    assert(vector_elements <= 4);
+    this->type = glsl_type::get_instance(GLSL_TYPE_INT, vector_elements, 1);
+    for (unsigned i = 0; i < vector_elements; i++) {
+       this->value.i[i] = integer;
+    }
+    for (unsigned i = vector_elements; i < 16; i++) {
+       this->value.i[i] = 0;
+    }
+ }
+ 
+ ir_constant::ir_constant(bool b, unsigned vector_elements)
+    : ir_rvalue(ir_type_constant)
+ {
+    assert(vector_elements <= 4);
+    this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, vector_elements, 1);
+    for (unsigned i = 0; i < vector_elements; i++) {
+       this->value.b[i] = b;
+    }
+    for (unsigned i = vector_elements; i < 16; i++) {
+       this->value.b[i] = false;
+    }
+ }
+ 
+ ir_constant::ir_constant(const ir_constant *c, unsigned i)
+    : ir_rvalue(ir_type_constant)
+ {
+    this->type = c->type->get_base_type();
+ 
+    switch (this->type->base_type) {
+    case GLSL_TYPE_UINT:  this->value.u[0] = c->value.u[i]; break;
+    case GLSL_TYPE_INT:   this->value.i[0] = c->value.i[i]; break;
+    case GLSL_TYPE_FLOAT: this->value.f[0] = c->value.f[i]; break;
+    case GLSL_TYPE_BOOL:  this->value.b[0] = c->value.b[i]; break;
+    case GLSL_TYPE_DOUBLE: this->value.d[0] = c->value.d[i]; break;
+    default:              assert(!"Should not get here."); break;
+    }
+ }
+ 
+ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
+    : ir_rvalue(ir_type_constant)
+ {
+    this->type = type;
+ 
+    assert(type->is_scalar() || type->is_vector() || type->is_matrix()
+         || type->is_record() || type->is_array());
+ 
+    if (type->is_array()) {
+       this->array_elements = ralloc_array(this, ir_constant *, type->length);
+       unsigned i = 0;
+       foreach_in_list(ir_constant, value, value_list) {
+        assert(value->as_constant() != NULL);
+ 
+        this->array_elements[i++] = value;
+       }
+       return;
+    }
+ 
+    /* If the constant is a record, the types of each of the entries in
+     * value_list must be a 1-for-1 match with the structure components.  Each
+     * entry must also be a constant.  Just move the nodes from the value_list
+     * to the list in the ir_constant.
+     */
+    /* FINISHME: Should there be some type checking and / or assertions here? */
+    /* FINISHME: Should the new constant take ownership of the nodes from
+     * FINISHME: value_list, or should it make copies?
+     */
+    if (type->is_record()) {
+       value_list->move_nodes_to(& this->components);
+       return;
+    }
+ 
+    for (unsigned i = 0; i < 16; i++) {
+       this->value.u[i] = 0;
+    }
+ 
+    ir_constant *value = (ir_constant *) (value_list->head);
+ 
+    /* Constructors with exactly one scalar argument are special for vectors
+     * and matrices.  For vectors, the scalar value is replicated to fill all
+     * the components.  For matrices, the scalar fills the components of the
+     * diagonal while the rest is filled with 0.
+     */
+    if (value->type->is_scalar() && value->next->is_tail_sentinel()) {
+       if (type->is_matrix()) {
+        /* Matrix - fill diagonal (rest is already set to 0) */
+          assert(type->base_type == GLSL_TYPE_FLOAT ||
+                 type->base_type == GLSL_TYPE_DOUBLE);
+          for (unsigned i = 0; i < type->matrix_columns; i++) {
+             if (type->base_type == GLSL_TYPE_FLOAT)
+                this->value.f[i * type->vector_elements + i] =
+                   value->value.f[0];
+             else
+                this->value.d[i * type->vector_elements + i] =
+                   value->value.d[0];
+          }
+       } else {
+        /* Vector or scalar - fill all components */
+        switch (type->base_type) {
+        case GLSL_TYPE_UINT:
+        case GLSL_TYPE_INT:
+           for (unsigned i = 0; i < type->components(); i++)
+              this->value.u[i] = value->value.u[0];
+           break;
+        case GLSL_TYPE_FLOAT:
+           for (unsigned i = 0; i < type->components(); i++)
+              this->value.f[i] = value->value.f[0];
+           break;
+        case GLSL_TYPE_DOUBLE:
+           for (unsigned i = 0; i < type->components(); i++)
+              this->value.d[i] = value->value.d[0];
+           break;
+        case GLSL_TYPE_BOOL:
+           for (unsigned i = 0; i < type->components(); i++)
+              this->value.b[i] = value->value.b[0];
+           break;
+        default:
+           assert(!"Should not get here.");
+           break;
+        }
+       }
+       return;
+    }
+ 
+    if (type->is_matrix() && value->type->is_matrix()) {
+       assert(value->next->is_tail_sentinel());
+ 
+       /* From section 5.4.2 of the GLSL 1.20 spec:
+        * "If a matrix is constructed from a matrix, then each component
+        *  (column i, row j) in the result that has a corresponding component
+        *  (column i, row j) in the argument will be initialized from there."
+        */
+       unsigned cols = MIN2(type->matrix_columns, value->type->matrix_columns);
+       unsigned rows = MIN2(type->vector_elements, value->type->vector_elements);
+       for (unsigned i = 0; i < cols; i++) {
+        for (unsigned j = 0; j < rows; j++) {
+           const unsigned src = i * value->type->vector_elements + j;
+           const unsigned dst = i * type->vector_elements + j;
+           this->value.f[dst] = value->value.f[src];
+        }
+       }
+ 
+       /* "All other components will be initialized to the identity matrix." */
+       for (unsigned i = cols; i < type->matrix_columns; i++)
+        this->value.f[i * type->vector_elements + i] = 1.0;
+ 
+       return;
+    }
+ 
+    /* Use each component from each entry in the value_list to initialize one
+     * component of the constant being constructed.
+     */
+    for (unsigned i = 0; i < type->components(); /* empty */) {
+       assert(value->as_constant() != NULL);
+       assert(!value->is_tail_sentinel());
+ 
+       for (unsigned j = 0; j < value->type->components(); j++) {
+        switch (type->base_type) {
+        case GLSL_TYPE_UINT:
+           this->value.u[i] = value->get_uint_component(j);
+           break;
+        case GLSL_TYPE_INT:
+           this->value.i[i] = value->get_int_component(j);
+           break;
+        case GLSL_TYPE_FLOAT:
+           this->value.f[i] = value->get_float_component(j);
+           break;
+        case GLSL_TYPE_BOOL:
+           this->value.b[i] = value->get_bool_component(j);
+           break;
+        case GLSL_TYPE_DOUBLE:
+           this->value.d[i] = value->get_double_component(j);
+           break;
+        default:
+           /* FINISHME: What to do?  Exceptions are not the answer.
+            */
+           break;
+        }
+ 
+        i++;
+        if (i >= type->components())
+           break;
+       }
+ 
+       value = (ir_constant *) value->next;
+    }
+ }
+ 
+ ir_constant *
+ ir_constant::zero(void *mem_ctx, const glsl_type *type)
+ {
+    assert(type->is_scalar() || type->is_vector() || type->is_matrix()
+         || type->is_record() || type->is_array());
+ 
+    ir_constant *c = new(mem_ctx) ir_constant;
+    c->type = type;
+    memset(&c->value, 0, sizeof(c->value));
+ 
+    if (type->is_array()) {
+       c->array_elements = ralloc_array(c, ir_constant *, type->length);
+ 
+       for (unsigned i = 0; i < type->length; i++)
+        c->array_elements[i] = ir_constant::zero(c, type->fields.array);
+    }
+ 
+    if (type->is_record()) {
+       for (unsigned i = 0; i < type->length; i++) {
+        ir_constant *comp = ir_constant::zero(mem_ctx, type->fields.structure[i].type);
+        c->components.push_tail(comp);
+       }
+    }
+ 
+    return c;
+ }
+ 
+ bool
+ ir_constant::get_bool_component(unsigned i) const
+ {
+    switch (this->type->base_type) {
+    case GLSL_TYPE_UINT:  return this->value.u[i] != 0;
+    case GLSL_TYPE_INT:   return this->value.i[i] != 0;
+    case GLSL_TYPE_FLOAT: return ((int)this->value.f[i]) != 0;
+    case GLSL_TYPE_BOOL:  return this->value.b[i];
+    case GLSL_TYPE_DOUBLE: return this->value.d[i] != 0.0;
+    default:              assert(!"Should not get here."); break;
+    }
+ 
+    /* Must return something to make the compiler happy.  This is clearly an
+     * error case.
+     */
+    return false;
+ }
+ 
+ float
+ ir_constant::get_float_component(unsigned i) const
+ {
+    switch (this->type->base_type) {
+    case GLSL_TYPE_UINT:  return (float) this->value.u[i];
+    case GLSL_TYPE_INT:   return (float) this->value.i[i];
+    case GLSL_TYPE_FLOAT: return this->value.f[i];
+    case GLSL_TYPE_BOOL:  return this->value.b[i] ? 1.0f : 0.0f;
+    case GLSL_TYPE_DOUBLE: return (float) this->value.d[i];
+    default:              assert(!"Should not get here."); break;
+    }
+ 
+    /* Must return something to make the compiler happy.  This is clearly an
+     * error case.
+     */
+    return 0.0;
+ }
+ 
+ double
+ ir_constant::get_double_component(unsigned i) const
+ {
+    switch (this->type->base_type) {
+    case GLSL_TYPE_UINT:  return (double) this->value.u[i];
+    case GLSL_TYPE_INT:   return (double) this->value.i[i];
+    case GLSL_TYPE_FLOAT: return (double) this->value.f[i];
+    case GLSL_TYPE_BOOL:  return this->value.b[i] ? 1.0 : 0.0;
+    case GLSL_TYPE_DOUBLE: return this->value.d[i];
+    default:              assert(!"Should not get here."); break;
+    }
+ 
+    /* Must return something to make the compiler happy.  This is clearly an
+     * error case.
+     */
+    return 0.0;
+ }
+ 
+ int
+ ir_constant::get_int_component(unsigned i) const
+ {
+    switch (this->type->base_type) {
+    case GLSL_TYPE_UINT:  return this->value.u[i];
+    case GLSL_TYPE_INT:   return this->value.i[i];
+    case GLSL_TYPE_FLOAT: return (int) this->value.f[i];
+    case GLSL_TYPE_BOOL:  return this->value.b[i] ? 1 : 0;
+    case GLSL_TYPE_DOUBLE: return (int) this->value.d[i];
+    default:              assert(!"Should not get here."); break;
+    }
+ 
+    /* Must return something to make the compiler happy.  This is clearly an
+     * error case.
+     */
+    return 0;
+ }
+ 
+ unsigned
+ ir_constant::get_uint_component(unsigned i) const
+ {
+    switch (this->type->base_type) {
+    case GLSL_TYPE_UINT:  return this->value.u[i];
+    case GLSL_TYPE_INT:   return this->value.i[i];
+    case GLSL_TYPE_FLOAT: return (unsigned) this->value.f[i];
+    case GLSL_TYPE_BOOL:  return this->value.b[i] ? 1 : 0;
+    case GLSL_TYPE_DOUBLE: return (unsigned) this->value.d[i];
+    default:              assert(!"Should not get here."); break;
+    }
+ 
+    /* Must return something to make the compiler happy.  This is clearly an
+     * error case.
+     */
+    return 0;
+ }
+ 
+ ir_constant *
+ ir_constant::get_array_element(unsigned i) const
+ {
+    assert(this->type->is_array());
+ 
+    /* From page 35 (page 41 of the PDF) of the GLSL 1.20 spec:
+     *
+     *     "Behavior is undefined if a shader subscripts an array with an index
+     *     less than 0 or greater than or equal to the size the array was
+     *     declared with."
+     *
+     * Most out-of-bounds accesses are removed before things could get this far.
+     * There are cases where non-constant array index values can get constant
+     * folded.
+     */
+    if (int(i) < 0)
+       i = 0;
+    else if (i >= this->type->length)
+       i = this->type->length - 1;
+ 
+    return array_elements[i];
+ }
+ 
+ ir_constant *
+ ir_constant::get_record_field(const char *name)
+ {
+    int idx = this->type->field_index(name);
+ 
+    if (idx < 0)
+       return NULL;
+ 
+    if (this->components.is_empty())
+       return NULL;
+ 
+    exec_node *node = this->components.head;
+    for (int i = 0; i < idx; i++) {
+       node = node->next;
+ 
+       /* If the end of the list is encountered before the element matching the
+        * requested field is found, return NULL.
+        */
+       if (node->is_tail_sentinel())
+        return NULL;
+    }
+ 
+    return (ir_constant *) node;
+ }
+ 
+ void
+ ir_constant::copy_offset(ir_constant *src, int offset)
+ {
+    switch (this->type->base_type) {
+    case GLSL_TYPE_UINT:
+    case GLSL_TYPE_INT:
+    case GLSL_TYPE_FLOAT:
+    case GLSL_TYPE_DOUBLE:
+    case GLSL_TYPE_BOOL: {
+       unsigned int size = src->type->components();
+       assert (size <= this->type->components() - offset);
+       for (unsigned int i=0; i<size; i++) {
+        switch (this->type->base_type) {
+        case GLSL_TYPE_UINT:
+           value.u[i+offset] = src->get_uint_component(i);
+           break;
+        case GLSL_TYPE_INT:
+           value.i[i+offset] = src->get_int_component(i);
+           break;
+        case GLSL_TYPE_FLOAT:
+           value.f[i+offset] = src->get_float_component(i);
+           break;
+        case GLSL_TYPE_BOOL:
+           value.b[i+offset] = src->get_bool_component(i);
+           break;
+        case GLSL_TYPE_DOUBLE:
+           value.d[i+offset] = src->get_double_component(i);
+           break;
+        default: // Shut up the compiler
+           break;
+        }
+       }
+       break;
+    }
+ 
+    case GLSL_TYPE_STRUCT: {
+       assert (src->type == this->type);
+       this->components.make_empty();
+       foreach_in_list(ir_constant, orig, &src->components) {
+        this->components.push_tail(orig->clone(this, NULL));
+       }
+       break;
+    }
+ 
+    case GLSL_TYPE_ARRAY: {
+       assert (src->type == this->type);
+       for (unsigned i = 0; i < this->type->length; i++) {
+        this->array_elements[i] = src->array_elements[i]->clone(this, NULL);
+       }
+       break;
+    }
+ 
+    default:
+       assert(!"Should not get here.");
+       break;
+    }
+ }
+ 
+ void
+ ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask)
+ {
+    assert (!type->is_array() && !type->is_record());
+ 
+    if (!type->is_vector() && !type->is_matrix()) {
+       offset = 0;
+       mask = 1;
+    }
+ 
+    int id = 0;
+    for (int i=0; i<4; i++) {
+       if (mask & (1 << i)) {
+        switch (this->type->base_type) {
+        case GLSL_TYPE_UINT:
+           value.u[i+offset] = src->get_uint_component(id++);
+           break;
+        case GLSL_TYPE_INT:
+           value.i[i+offset] = src->get_int_component(id++);
+           break;
+        case GLSL_TYPE_FLOAT:
+           value.f[i+offset] = src->get_float_component(id++);
+           break;
+        case GLSL_TYPE_BOOL:
+           value.b[i+offset] = src->get_bool_component(id++);
+           break;
+        case GLSL_TYPE_DOUBLE:
+           value.d[i+offset] = src->get_double_component(id++);
+           break;
+        default:
+           assert(!"Should not get here.");
+           return;
+        }
+       }
+    }
+ }
+ 
+ bool
+ ir_constant::has_value(const ir_constant *c) const
+ {
+    if (this->type != c->type)
+       return false;
+ 
+    if (this->type->is_array()) {
+       for (unsigned i = 0; i < this->type->length; i++) {
+        if (!this->array_elements[i]->has_value(c->array_elements[i]))
+           return false;
+       }
+       return true;
+    }
+ 
+    if (this->type->base_type == GLSL_TYPE_STRUCT) {
+       const exec_node *a_node = this->components.head;
+       const exec_node *b_node = c->components.head;
+ 
+       while (!a_node->is_tail_sentinel()) {
+        assert(!b_node->is_tail_sentinel());
+ 
+        const ir_constant *const a_field = (ir_constant *) a_node;
+        const ir_constant *const b_field = (ir_constant *) b_node;
+ 
+        if (!a_field->has_value(b_field))
+           return false;
+ 
+        a_node = a_node->next;
+        b_node = b_node->next;
+       }
+ 
+       return true;
+    }
+ 
+    for (unsigned i = 0; i < this->type->components(); i++) {
+       switch (this->type->base_type) {
+       case GLSL_TYPE_UINT:
+        if (this->value.u[i] != c->value.u[i])
+           return false;
+        break;
+       case GLSL_TYPE_INT:
+        if (this->value.i[i] != c->value.i[i])
+           return false;
+        break;
+       case GLSL_TYPE_FLOAT:
+        if (this->value.f[i] != c->value.f[i])
+           return false;
+        break;
+       case GLSL_TYPE_BOOL:
+        if (this->value.b[i] != c->value.b[i])
+           return false;
+        break;
+       case GLSL_TYPE_DOUBLE:
+        if (this->value.d[i] != c->value.d[i])
+           return false;
+        break;
+       default:
+        assert(!"Should not get here.");
+        return false;
+       }
+    }
+ 
+    return true;
+ }
+ 
+ bool
+ ir_constant::is_value(float f, int i) const
+ {
+    if (!this->type->is_scalar() && !this->type->is_vector())
+       return false;
+ 
+    /* Only accept boolean values for 0/1. */
+    if (int(bool(i)) != i && this->type->is_boolean())
+       return false;
+ 
+    for (unsigned c = 0; c < this->type->vector_elements; c++) {
+       switch (this->type->base_type) {
+       case GLSL_TYPE_FLOAT:
+        if (this->value.f[c] != f)
+           return false;
+        break;
+       case GLSL_TYPE_INT:
+        if (this->value.i[c] != i)
+           return false;
+        break;
+       case GLSL_TYPE_UINT:
+        if (this->value.u[c] != unsigned(i))
+           return false;
+        break;
+       case GLSL_TYPE_BOOL:
+        if (this->value.b[c] != bool(i))
+           return false;
+        break;
+       case GLSL_TYPE_DOUBLE:
+        if (this->value.d[c] != double(f))
+           return false;
+        break;
+       default:
+        /* The only other base types are structures, arrays, and samplers.
+         * Samplers cannot be constants, and the others should have been
+         * filtered out above.
+         */
+        assert(!"Should not get here.");
+        return false;
+       }
+    }
+ 
+    return true;
+ }
+ 
+ bool
+ ir_constant::is_zero() const
+ {
+    return is_value(0.0, 0);
+ }
+ 
+ bool
+ ir_constant::is_one() const
+ {
+    return is_value(1.0, 1);
+ }
+ 
+ bool
+ ir_constant::is_negative_one() const
+ {
+    return is_value(-1.0, -1);
+ }
+ 
+ bool
+ ir_constant::is_uint16_constant() const
+ {
+    if (!type->is_integer())
+       return false;
+ 
+    return value.u[0] < (1 << 16);
+ }
+ 
+ ir_loop::ir_loop()
+    : ir_instruction(ir_type_loop)
+ {
+ }
+ 
+ 
+ ir_dereference_variable::ir_dereference_variable(ir_variable *var)
+    : ir_dereference(ir_type_dereference_variable)
+ {
+    assert(var != NULL);
+ 
+    this->var = var;
+    this->type = var->type;
+ }
+ 
+ 
+ ir_dereference_array::ir_dereference_array(ir_rvalue *value,
+                                          ir_rvalue *array_index)
+    : ir_dereference(ir_type_dereference_array)
+ {
+    this->array_index = array_index;
+    this->set_array(value);
+ }
+ 
+ 
+ ir_dereference_array::ir_dereference_array(ir_variable *var,
+                                          ir_rvalue *array_index)
+    : ir_dereference(ir_type_dereference_array)
+ {
+    void *ctx = ralloc_parent(var);
+ 
+    this->array_index = array_index;
+    this->set_array(new(ctx) ir_dereference_variable(var));
+ }
+ 
+ 
+ void
+ ir_dereference_array::set_array(ir_rvalue *value)
+ {
+    assert(value != NULL);
+ 
+    this->array = value;
+ 
+    const glsl_type *const vt = this->array->type;
+ 
+    if (vt->is_array()) {
+       type = vt->fields.array;
+    } else if (vt->is_matrix()) {
+       type = vt->column_type();
+    } else if (vt->is_vector()) {
+       type = vt->get_base_type();
+    }
+ }
+ 
+ 
+ ir_dereference_record::ir_dereference_record(ir_rvalue *value,
+                                            const char *field)
+    : ir_dereference(ir_type_dereference_record)
+ {
+    assert(value != NULL);
+ 
+    this->record = value;
+    this->field = ralloc_strdup(this, field);
+    this->type = this->record->type->field_type(field);
+ }
+ 
+ 
+ ir_dereference_record::ir_dereference_record(ir_variable *var,
+                                            const char *field)
+    : ir_dereference(ir_type_dereference_record)
+ {
+    void *ctx = ralloc_parent(var);
+ 
+    this->record = new(ctx) ir_dereference_variable(var);
+    this->field = ralloc_strdup(this, field);
+    this->type = this->record->type->field_type(field);
+ }
+ 
+ bool
+ ir_dereference::is_lvalue() const
+ {
+    ir_variable *var = this->variable_referenced();
+ 
+    /* Every l-value derference chain eventually ends in a variable.
+     */
+    if ((var == NULL) || var->data.read_only)
+       return false;
+ 
+    /* From section 4.1.7 of the GLSL 4.40 spec:
+     *
+     *   "Opaque variables cannot be treated as l-values; hence cannot
+     *    be used as out or inout function parameters, nor can they be
+     *    assigned into."
+     */
+    if (this->type->contains_opaque())
+       return false;
+ 
+    return true;
+ }
+ 
+ 
+ static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", "samples_identical" };
+ 
+ const char *ir_texture::opcode_string()
+ {
+    assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs));
+    return tex_opcode_strs[op];
+ }
+ 
+ ir_texture_opcode
+ ir_texture::get_opcode(const char *str)
+ {
+    const int count = sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]);
+    for (int op = 0; op < count; op++) {
+       if (strcmp(str, tex_opcode_strs[op]) == 0)
+        return (ir_texture_opcode) op;
+    }
+    return (ir_texture_opcode) -1;
+ }
+ 
+ 
+ void
+ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
+ {
+    assert(sampler != NULL);
+    assert(type != NULL);
+    this->sampler = sampler;
+    this->type = type;
+ 
+    if (this->op == ir_txs || this->op == ir_query_levels ||
+        this->op == ir_texture_samples) {
+       assert(type->base_type == GLSL_TYPE_INT);
+    } else if (this->op == ir_lod) {
+       assert(type->vector_elements == 2);
+       assert(type->base_type == GLSL_TYPE_FLOAT);
+    } else if (this->op == ir_samples_identical) {
+       assert(type == glsl_type::bool_type);
+       assert(sampler->type->base_type == GLSL_TYPE_SAMPLER);
+       assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS);
+    } else {
+       assert(sampler->type->sampler_type == (int) type->base_type);
+       if (sampler->type->sampler_shadow)
+        assert(type->vector_elements == 4 || type->vector_elements == 1);
+       else
+        assert(type->vector_elements == 4);
+    }
+ }
+ 
+ 
+ void
+ ir_swizzle::init_mask(const unsigned *comp, unsigned count)
+ {
+    assert((count >= 1) && (count <= 4));
+ 
+    memset(&this->mask, 0, sizeof(this->mask));
+    this->mask.num_components = count;
+ 
+    unsigned dup_mask = 0;
+    switch (count) {
+    case 4:
+       assert(comp[3] <= 3);
+       dup_mask |= (1U << comp[3])
+        & ((1U << comp[0]) | (1U << comp[1]) | (1U << comp[2]));
+       this->mask.w = comp[3];
+ 
+    case 3:
+       assert(comp[2] <= 3);
+       dup_mask |= (1U << comp[2])
+        & ((1U << comp[0]) | (1U << comp[1]));
+       this->mask.z = comp[2];
+ 
+    case 2:
+       assert(comp[1] <= 3);
+       dup_mask |= (1U << comp[1])
+        & ((1U << comp[0]));
+       this->mask.y = comp[1];
+ 
+    case 1:
+       assert(comp[0] <= 3);
+       this->mask.x = comp[0];
+    }
+ 
+    this->mask.has_duplicates = dup_mask != 0;
+ 
+    /* Based on the number of elements in the swizzle and the base type
+     * (i.e., float, int, unsigned, or bool) of the vector being swizzled,
+     * generate the type of the resulting value.
+     */
+    type = glsl_type::get_instance(val->type->base_type, mask.num_components, 1);
+ }
+ 
+ ir_swizzle::ir_swizzle(ir_rvalue *val, unsigned x, unsigned y, unsigned z,
+                      unsigned w, unsigned count)
+    : ir_rvalue(ir_type_swizzle), val(val)
+ {
+    const unsigned components[4] = { x, y, z, w };
+    this->init_mask(components, count);
+ }
+ 
+ ir_swizzle::ir_swizzle(ir_rvalue *val, const unsigned *comp,
+                      unsigned count)
+    : ir_rvalue(ir_type_swizzle), val(val)
+ {
+    this->init_mask(comp, count);
+ }
+ 
+ ir_swizzle::ir_swizzle(ir_rvalue *val, ir_swizzle_mask mask)
+    : ir_rvalue(ir_type_swizzle)
+ {
+    this->val = val;
+    this->mask = mask;
+    this->type = glsl_type::get_instance(val->type->base_type,
+                                       mask.num_components, 1);
+ }
+ 
+ #define X 1
+ #define R 5
+ #define S 9
+ #define I 13
+ 
+ ir_swizzle *
+ ir_swizzle::create(ir_rvalue *val, const char *str, unsigned vector_length)
+ {
+    void *ctx = ralloc_parent(val);
+ 
+    /* For each possible swizzle character, this table encodes the value in
+     * \c idx_map that represents the 0th element of the vector.  For invalid
+     * swizzle characters (e.g., 'k'), a special value is used that will allow
+     * detection of errors.
+     */
+    static const unsigned char base_idx[26] = {
+    /* a  b  c  d  e  f  g  h  i  j  k  l  m */
+       R, R, I, I, I, I, R, I, I, I, I, I, I,
+    /* n  o  p  q  r  s  t  u  v  w  x  y  z */
+       I, I, S, S, R, S, S, I, I, X, X, X, X
+    };
+ 
+    /* Each valid swizzle character has an entry in the previous table.  This
+     * table encodes the base index encoded in the previous table plus the actual
+     * index of the swizzle character.  When processing swizzles, the first
+     * character in the string is indexed in the previous table.  Each character
+     * in the string is indexed in this table, and the value found there has the
+     * value form the first table subtracted.  The result must be on the range
+     * [0,3].
+     *
+     * For example, the string "wzyx" will get X from the first table.  Each of
+     * the charcaters will get X+3, X+2, X+1, and X+0 from this table.  After
+     * subtraction, the swizzle values are { 3, 2, 1, 0 }.
+     *
+     * The string "wzrg" will get X from the first table.  Each of the characters
+     * will get X+3, X+2, R+0, and R+1 from this table.  After subtraction, the
+     * swizzle values are { 3, 2, 4, 5 }.  Since 4 and 5 are outside the range
+     * [0,3], the error is detected.
+     */
+    static const unsigned char idx_map[26] = {
+    /* a    b    c    d    e    f    g    h    i    j    k    l    m */
+       R+3, R+2, 0,   0,   0,   0,   R+1, 0,   0,   0,   0,   0,   0,
+    /* n    o    p    q    r    s    t    u    v    w    x    y    z */
+       0,   0,   S+2, S+3, R+0, S+0, S+1, 0,   0,   X+3, X+0, X+1, X+2
+    };
+ 
+    int swiz_idx[4] = { 0, 0, 0, 0 };
+    unsigned i;
+ 
+ 
+    /* Validate the first character in the swizzle string and look up the base
+     * index value as described above.
+     */
+    if ((str[0] < 'a') || (str[0] > 'z'))
+       return NULL;
+ 
+    const unsigned base = base_idx[str[0] - 'a'];
+ 
+ 
+    for (i = 0; (i < 4) && (str[i] != '\0'); i++) {
+       /* Validate the next character, and, as described above, convert it to a
+        * swizzle index.
+        */
+       if ((str[i] < 'a') || (str[i] > 'z'))
+        return NULL;
+ 
+       swiz_idx[i] = idx_map[str[i] - 'a'] - base;
+       if ((swiz_idx[i] < 0) || (swiz_idx[i] >= (int) vector_length))
+        return NULL;
+    }
+ 
+    if (str[i] != '\0')
+        return NULL;
+ 
+    return new(ctx) ir_swizzle(val, swiz_idx[0], swiz_idx[1], swiz_idx[2],
+                             swiz_idx[3], i);
+ }
+ 
+ #undef X
+ #undef R
+ #undef S
+ #undef I
+ 
+ ir_variable *
+ ir_swizzle::variable_referenced() const
+ {
+    return this->val->variable_referenced();
+ }
+ 
+ 
+ bool ir_variable::temporaries_allocate_names = false;
+ 
+ const char ir_variable::tmp_name[] = "compiler_temp";
+ 
+ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
+                        ir_variable_mode mode)
+    : ir_instruction(ir_type_variable)
+ {
+    this->type = type;
+ 
+    if (mode == ir_var_temporary && !ir_variable::temporaries_allocate_names)
+       name = NULL;
+ 
+    /* The ir_variable clone method may call this constructor with name set to
+     * tmp_name.
+     */
+    assert(name != NULL
+           || mode == ir_var_temporary
+           || mode == ir_var_function_in
+           || mode == ir_var_function_out
+           || mode == ir_var_function_inout);
+    assert(name != ir_variable::tmp_name
+           || mode == ir_var_temporary);
+    if (mode == ir_var_temporary
+        && (name == NULL || name == ir_variable::tmp_name)) {
+       this->name = ir_variable::tmp_name;
+    } else {
+       this->name = ralloc_strdup(this, name);
+    }
+ 
+    this->u.max_ifc_array_access = NULL;
+ 
+    this->data.explicit_location = false;
+    this->data.has_initializer = false;
+    this->data.location = -1;
+    this->data.location_frac = 0;
+    this->data.binding = 0;
+    this->data.warn_extension_index = 0;
+    this->constant_value = NULL;
+    this->constant_initializer = NULL;
+    this->data.origin_upper_left = false;
+    this->data.pixel_center_integer = false;
+    this->data.depth_layout = ir_depth_layout_none;
+    this->data.used = false;
+    this->data.always_active_io = false;
+    this->data.read_only = false;
+    this->data.centroid = false;
+    this->data.sample = false;
+    this->data.patch = false;
+    this->data.invariant = false;
+    this->data.how_declared = ir_var_declared_normally;
+    this->data.mode = mode;
+    this->data.interpolation = INTERP_QUALIFIER_NONE;
+    this->data.max_array_access = 0;
+    this->data.offset = 0;
+    this->data.precision = GLSL_PRECISION_NONE;
+    this->data.image_read_only = false;
+    this->data.image_write_only = false;
+    this->data.image_coherent = false;
+    this->data.image_volatile = false;
+    this->data.image_restrict = false;
+    this->data.from_ssbo_unsized_array = false;
+ 
+    if (type != NULL) {
+       if (type->base_type == GLSL_TYPE_SAMPLER)
+          this->data.read_only = true;
+ 
+       if (type->is_interface())
+          this->init_interface_type(type);
+       else if (type->without_array()->is_interface())
+          this->init_interface_type(type->without_array());
+    }
+ }
+ 
+ 
+ const char *
+ interpolation_string(unsigned interpolation)
+ {
+    switch (interpolation) {
+    case INTERP_QUALIFIER_NONE:          return "no";
+    case INTERP_QUALIFIER_SMOOTH:        return "smooth";
+    case INTERP_QUALIFIER_FLAT:          return "flat";
+    case INTERP_QUALIFIER_NOPERSPECTIVE: return "noperspective";
+    }
+ 
+    assert(!"Should not get here.");
+    return "";
+ }
+ 
+ 
+ glsl_interp_qualifier
+ ir_variable::determine_interpolation_mode(bool flat_shade)
+ {
+    if (this->data.interpolation != INTERP_QUALIFIER_NONE)
+       return (glsl_interp_qualifier) this->data.interpolation;
+    int location = this->data.location;
+    bool is_gl_Color =
+       location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1;
+    if (flat_shade && is_gl_Color)
+       return INTERP_QUALIFIER_FLAT;
+    else
+       return INTERP_QUALIFIER_SMOOTH;
+ }
+ 
+ const char *const ir_variable::warn_extension_table[] = {
+    "",
+    "GL_ARB_shader_stencil_export",
+    "GL_AMD_shader_stencil_export",
+ };
+ 
+ void
+ ir_variable::enable_extension_warning(const char *extension)
+ {
+    for (unsigned i = 0; i < ARRAY_SIZE(warn_extension_table); i++) {
+       if (strcmp(warn_extension_table[i], extension) == 0) {
+          this->data.warn_extension_index = i;
+          return;
+       }
+    }
+ 
+    assert(!"Should not get here.");
+    this->data.warn_extension_index = 0;
+ }
+ 
+ const char *
+ ir_variable::get_extension_warning() const
+ {
+    return this->data.warn_extension_index == 0
+       ? NULL : warn_extension_table[this->data.warn_extension_index];
+ }
+ 
+ ir_function_signature::ir_function_signature(const glsl_type *return_type,
+                                              builtin_available_predicate b)
+    : ir_instruction(ir_type_function_signature),
+      return_type(return_type), is_defined(false), is_intrinsic(false),
+      builtin_avail(b), _function(NULL)
+ {
+    this->origin = NULL;
+ }
+ 
+ 
+ bool
+ ir_function_signature::is_builtin() const
+ {
+    return builtin_avail != NULL;
+ }
+ 
+ 
+ bool
+ ir_function_signature::is_builtin_available(const _mesa_glsl_parse_state *state) const
+ {
+    /* We can't call the predicate without a state pointer, so just say that
+     * the signature is available.  At compile time, we need the filtering,
+     * but also receive a valid state pointer.  At link time, we're resolving
+     * imported built-in prototypes to their definitions, which will always
+     * be an exact match.  So we can skip the filtering.
+     */
+    if (state == NULL)
+       return true;
+ 
+    assert(builtin_avail != NULL);
+    return builtin_avail(state);
+ }
+ 
+ 
+ static bool
+ modes_match(unsigned a, unsigned b)
+ {
+    if (a == b)
+       return true;
+ 
+    /* Accept "in" vs. "const in" */
+    if ((a == ir_var_const_in && b == ir_var_function_in) ||
+        (b == ir_var_const_in && a == ir_var_function_in))
+       return true;
+ 
+    return false;
+ }
+ 
+ 
+ const char *
+ ir_function_signature::qualifiers_match(exec_list *params)
+ {
+    /* check that the qualifiers match. */
+    foreach_two_lists(a_node, &this->parameters, b_node, params) {
+       ir_variable *a = (ir_variable *) a_node;
+       ir_variable *b = (ir_variable *) b_node;
+ 
+       if (a->data.read_only != b->data.read_only ||
+         !modes_match(a->data.mode, b->data.mode) ||
+         a->data.interpolation != b->data.interpolation ||
+         a->data.centroid != b->data.centroid ||
+           a->data.sample != b->data.sample ||
+           a->data.patch != b->data.patch ||
+           a->data.image_read_only != b->data.image_read_only ||
+           a->data.image_write_only != b->data.image_write_only ||
+           a->data.image_coherent != b->data.image_coherent ||
+           a->data.image_volatile != b->data.image_volatile ||
+           a->data.image_restrict != b->data.image_restrict) {
+ 
+        /* parameter a's qualifiers don't match */
+        return a->name;
+       }
+    }
+    return NULL;
+ }
+ 
+ 
+ void
+ ir_function_signature::replace_parameters(exec_list *new_params)
+ {
+    /* Destroy all of the previous parameter information.  If the previous
+     * parameter information comes from the function prototype, it may either
+     * specify incorrect parameter names or not have names at all.
+     */
+    new_params->move_nodes_to(&parameters);
+ }
+ 
+ 
+ ir_function::ir_function(const char *name)
+    : ir_instruction(ir_type_function)
+ {
+    this->subroutine_index = -1;
+    this->name = ralloc_strdup(this, name);
+ }
+ 
+ 
+ bool
+ ir_function::has_user_signature()
+ {
+    foreach_in_list(ir_function_signature, sig, &this->signatures) {
+       if (!sig->is_builtin())
+        return true;
+    }
+    return false;
+ }
+ 
+ 
+ ir_rvalue *
+ ir_rvalue::error_value(void *mem_ctx)
+ {
+    ir_rvalue *v = new(mem_ctx) ir_rvalue(ir_type_unset);
+ 
+    v->type = glsl_type::error_type;
+    return v;
+ }
+ 
+ 
+ void
+ visit_exec_list(exec_list *list, ir_visitor *visitor)
+ {
+    foreach_in_list_safe(ir_instruction, node, list) {
+       node->accept(visitor);
+    }
+ }
+ 
+ 
+ static void
+ steal_memory(ir_instruction *ir, void *new_ctx)
+ {
+    ir_variable *var = ir->as_variable();
+    ir_function *fn = ir->as_function();
+    ir_constant *constant = ir->as_constant();
+    if (var != NULL && var->constant_value != NULL)
+       steal_memory(var->constant_value, ir);
+ 
+    if (var != NULL && var->constant_initializer != NULL)
+       steal_memory(var->constant_initializer, ir);
+ 
+    if (fn != NULL && fn->subroutine_types)
+       ralloc_steal(new_ctx, fn->subroutine_types);
+ 
+    /* The components of aggregate constants are not visited by the normal
+     * visitor, so steal their values by hand.
+     */
+    if (constant != NULL) {
+       if (constant->type->is_record()) {
+        foreach_in_list(ir_constant, field, &constant->components) {
+           steal_memory(field, ir);
+        }
+       } else if (constant->type->is_array()) {
+        for (unsigned int i = 0; i < constant->type->length; i++) {
+           steal_memory(constant->array_elements[i], ir);
+        }
+       }
+    }
+ 
+    ralloc_steal(new_ctx, ir);
+ }
+ 
+ 
+ void
+ reparent_ir(exec_list *list, void *mem_ctx)
+ {
+    foreach_in_list(ir_instruction, node, list) {
+       visit_tree(node, steal_memory, mem_ctx);
+    }
+ }
+ 
+ 
+ static ir_rvalue *
+ try_min_one(ir_rvalue *ir)
+ {
+    ir_expression *expr = ir->as_expression();
+ 
+    if (!expr || expr->operation != ir_binop_min)
+       return NULL;
+ 
+    if (expr->operands[0]->is_one())
+       return expr->operands[1];
+ 
+    if (expr->operands[1]->is_one())
+       return expr->operands[0];
+ 
+    return NULL;
+ }
+ 
+ static ir_rvalue *
+ try_max_zero(ir_rvalue *ir)
+ {
+    ir_expression *expr = ir->as_expression();
+ 
+    if (!expr || expr->operation != ir_binop_max)
+       return NULL;
+ 
+    if (expr->operands[0]->is_zero())
+       return expr->operands[1];
+ 
+    if (expr->operands[1]->is_zero())
+       return expr->operands[0];
+ 
+    return NULL;
+ }
+ 
+ ir_rvalue *
+ ir_rvalue::as_rvalue_to_saturate()
+ {
+    ir_expression *expr = this->as_expression();
+ 
+    if (!expr)
+       return NULL;
+ 
+    ir_rvalue *max_zero = try_max_zero(expr);
+    if (max_zero) {
+       return try_min_one(max_zero);
+    } else {
+       ir_rvalue *min_one = try_min_one(expr);
+       if (min_one) {
+        return try_max_zero(min_one);
+       }
+    }
+ 
+    return NULL;
+ }
+ 
+ 
+ unsigned
+ vertices_per_prim(GLenum prim)
+ {
+    switch (prim) {
+    case GL_POINTS:
+       return 1;
+    case GL_LINES:
+       return 2;
+    case GL_TRIANGLES:
+       return 3;
+    case GL_LINES_ADJACENCY:
+       return 4;
+    case GL_TRIANGLES_ADJACENCY:
+       return 6;
+    default:
+       assert(!"Bad primitive");
+       return 3;
+    }
+ }
+ 
+ /**
+  * Generate a string describing the mode of a variable
+  */
+ const char *
+ mode_string(const ir_variable *var)
+ {
+    switch (var->data.mode) {
+    case ir_var_auto:
+       return (var->data.read_only) ? "global constant" : "global variable";
+ 
+    case ir_var_uniform:
+       return "uniform";
+ 
+    case ir_var_shader_storage:
+       return "buffer";
+ 
+    case ir_var_shader_in:
+       return "shader input";
+ 
+    case ir_var_shader_out:
+       return "shader output";
+ 
+    case ir_var_function_in:
+    case ir_var_const_in:
+       return "function input";
+ 
+    case ir_var_function_out:
+       return "function output";
+ 
+    case ir_var_function_inout:
+       return "function inout";
+ 
+    case ir_var_system_value:
+       return "shader input";
+ 
+    case ir_var_temporary:
+       return "compiler temporary";
+ 
+    case ir_var_mode_count:
+       break;
+    }
+ 
+    assert(!"Should not get here.");
+    return "invalid variable";
+ }
diff --cc src/compiler/glsl/ir.h

index 0000000000000000000000000000000000000000,bd7b550634393b825cf1aea5a11d6d67af41e9f0..09e21b221885632f3d1e20ec8c9ede9cbd00bf2e

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@@ -1,0 -1,2632 +1,2613 @@@
- -   /**
- -    * \name Lowered floating point unpacking operations.
- -    *
- -    * \see lower_packing_builtins_visitor::split_unpack_half_2x16
- -    */
- -   /*@{*/
- -   ir_unop_unpack_half_2x16_split_x,
- -   ir_unop_unpack_half_2x16_split_y,
- -   /*@}*/
- -
+ /* -*- c++ -*- */
+ /*
+  * Copyright © 2010 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ #pragma once
+ #ifndef IR_H
+ #define IR_H
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include "util/ralloc.h"
+ #include "compiler/glsl_types.h"
+ #include "list.h"
+ #include "ir_visitor.h"
+ #include "ir_hierarchical_visitor.h"
+ #include "main/mtypes.h"
+ 
+ #ifdef __cplusplus
+ 
+ /**
+  * \defgroup IR Intermediate representation nodes
+  *
+  * @{
+  */
+ 
+ /**
+  * Class tags
+  *
+  * Each concrete class derived from \c ir_instruction has a value in this
+  * enumerant.  The value for the type is stored in \c ir_instruction::ir_type
+  * by the constructor.  While using type tags is not very C++, it is extremely
+  * convenient.  For example, during debugging you can simply inspect
+  * \c ir_instruction::ir_type to find out the actual type of the object.
+  *
+  * In addition, it is possible to use a switch-statement based on \c
+  * \c ir_instruction::ir_type to select different behavior for different object
+  * types.  For functions that have only slight differences for several object
+  * types, this allows writing very straightforward, readable code.
+  */
+ enum ir_node_type {
+    ir_type_dereference_array,
+    ir_type_dereference_record,
+    ir_type_dereference_variable,
+    ir_type_constant,
+    ir_type_expression,
+    ir_type_swizzle,
+    ir_type_texture,
+    ir_type_variable,
+    ir_type_assignment,
+    ir_type_call,
+    ir_type_function,
+    ir_type_function_signature,
+    ir_type_if,
+    ir_type_loop,
+    ir_type_loop_jump,
+    ir_type_return,
+    ir_type_discard,
+    ir_type_emit_vertex,
+    ir_type_end_primitive,
+    ir_type_barrier,
+    ir_type_max, /**< maximum ir_type enum number, for validation */
+    ir_type_unset = ir_type_max
+ };
+ 
+ 
+ /**
+  * Base class of all IR instructions
+  */
+ class ir_instruction : public exec_node {
+ public:
+    enum ir_node_type ir_type;
+ 
+    /**
+     * GCC 4.7+ and clang warn when deleting an ir_instruction unless
+     * there's a virtual destructor present.  Because we almost
+     * universally use ralloc for our memory management of
+     * ir_instructions, the destructor doesn't need to do any work.
+     */
+    virtual ~ir_instruction()
+    {
+    }
+ 
+    /** ir_print_visitor helper for debugging. */
+    void print(void) const;
+    void fprint(FILE *f) const;
+ 
+    virtual void accept(ir_visitor *) = 0;
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *) = 0;
+    virtual ir_instruction *clone(void *mem_ctx,
+                                struct hash_table *ht) const = 0;
+ 
+    bool is_rvalue() const
+    {
+       return ir_type == ir_type_dereference_array ||
+              ir_type == ir_type_dereference_record ||
+              ir_type == ir_type_dereference_variable ||
+              ir_type == ir_type_constant ||
+              ir_type == ir_type_expression ||
+              ir_type == ir_type_swizzle ||
+              ir_type == ir_type_texture;
+    }
+ 
+    bool is_dereference() const
+    {
+       return ir_type == ir_type_dereference_array ||
+              ir_type == ir_type_dereference_record ||
+              ir_type == ir_type_dereference_variable;
+    }
+ 
+    bool is_jump() const
+    {
+       return ir_type == ir_type_loop_jump ||
+              ir_type == ir_type_return ||
+              ir_type == ir_type_discard;
+    }
+ 
+    /**
+     * \name IR instruction downcast functions
+     *
+     * These functions either cast the object to a derived class or return
+     * \c NULL if the object's type does not match the specified derived class.
+     * Additional downcast functions will be added as needed.
+     */
+    /*@{*/
+    #define AS_BASE(TYPE)                                \
+    class ir_##TYPE *as_##TYPE()                         \
+    {                                                    \
+       assume(this != NULL);                             \
+       return is_##TYPE() ? (ir_##TYPE *) this : NULL;   \
+    }                                                    \
+    const class ir_##TYPE *as_##TYPE() const             \
+    {                                                    \
+       assume(this != NULL);                             \
+       return is_##TYPE() ? (ir_##TYPE *) this : NULL;   \
+    }
+ 
+    AS_BASE(rvalue)
+    AS_BASE(dereference)
+    AS_BASE(jump)
+    #undef AS_BASE
+ 
+    #define AS_CHILD(TYPE) \
+    class ir_##TYPE * as_##TYPE() \
+    { \
+       assume(this != NULL);                                         \
+       return ir_type == ir_type_##TYPE ? (ir_##TYPE *) this : NULL; \
+    }                                                                      \
+    const class ir_##TYPE * as_##TYPE() const                              \
+    {                                                                      \
+       assume(this != NULL);                                               \
+       return ir_type == ir_type_##TYPE ? (const ir_##TYPE *) this : NULL; \
+    }
+    AS_CHILD(variable)
+    AS_CHILD(function)
+    AS_CHILD(dereference_array)
+    AS_CHILD(dereference_variable)
+    AS_CHILD(dereference_record)
+    AS_CHILD(expression)
+    AS_CHILD(loop)
+    AS_CHILD(assignment)
+    AS_CHILD(call)
+    AS_CHILD(return)
+    AS_CHILD(if)
+    AS_CHILD(swizzle)
+    AS_CHILD(texture)
+    AS_CHILD(constant)
+    AS_CHILD(discard)
+    #undef AS_CHILD
+    /*@}*/
+ 
+    /**
+     * IR equality method: Return true if the referenced instruction would
+     * return the same value as this one.
+     *
+     * This intended to be used for CSE and algebraic optimizations, on rvalues
+     * in particular.  No support for other instruction types (assignments,
+     * jumps, calls, etc.) is planned.
+     */
+    virtual bool equals(const ir_instruction *ir,
+                        enum ir_node_type ignore = ir_type_unset) const;
+ 
+ protected:
+    ir_instruction(enum ir_node_type t)
+       : ir_type(t)
+    {
+    }
+ 
+ private:
+    ir_instruction()
+    {
+       assert(!"Should not get here.");
+    }
+ };
+ 
+ 
+ /**
+  * The base class for all "values"/expression trees.
+  */
+ class ir_rvalue : public ir_instruction {
+ public:
+    const struct glsl_type *type;
+ 
+    virtual ir_rvalue *clone(void *mem_ctx, struct hash_table *) const;
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ 
+    ir_rvalue *as_rvalue_to_saturate();
+ 
+    virtual bool is_lvalue() const
+    {
+       return false;
+    }
+ 
+    /**
+     * Get the variable that is ultimately referenced by an r-value
+     */
+    virtual ir_variable *variable_referenced() const
+    {
+       return NULL;
+    }
+ 
+ 
+    /**
+     * If an r-value is a reference to a whole variable, get that variable
+     *
+     * \return
+     * Pointer to a variable that is completely dereferenced by the r-value.  If
+     * the r-value is not a dereference or the dereference does not access the
+     * entire variable (i.e., it's just one array element, struct field), \c NULL
+     * is returned.
+     */
+    virtual ir_variable *whole_variable_referenced()
+    {
+       return NULL;
+    }
+ 
+    /**
+     * Determine if an r-value has the value zero
+     *
+     * The base implementation of this function always returns \c false.  The
+     * \c ir_constant class over-rides this function to return \c true \b only
+     * for vector and scalar types that have all elements set to the value
+     * zero (or \c false for booleans).
+     *
+     * \sa ir_constant::has_value, ir_rvalue::is_one, ir_rvalue::is_negative_one
+     */
+    virtual bool is_zero() const;
+ 
+    /**
+     * Determine if an r-value has the value one
+     *
+     * The base implementation of this function always returns \c false.  The
+     * \c ir_constant class over-rides this function to return \c true \b only
+     * for vector and scalar types that have all elements set to the value
+     * one (or \c true for booleans).
+     *
+     * \sa ir_constant::has_value, ir_rvalue::is_zero, ir_rvalue::is_negative_one
+     */
+    virtual bool is_one() const;
+ 
+    /**
+     * Determine if an r-value has the value negative one
+     *
+     * The base implementation of this function always returns \c false.  The
+     * \c ir_constant class over-rides this function to return \c true \b only
+     * for vector and scalar types that have all elements set to the value
+     * negative one.  For boolean types, the result is always \c false.
+     *
+     * \sa ir_constant::has_value, ir_rvalue::is_zero, ir_rvalue::is_one
+     */
+    virtual bool is_negative_one() const;
+ 
+    /**
+     * Determine if an r-value is an unsigned integer constant which can be
+     * stored in 16 bits.
+     *
+     * \sa ir_constant::is_uint16_constant.
+     */
+    virtual bool is_uint16_constant() const { return false; }
+ 
+    /**
+     * Return a generic value of error_type.
+     *
+     * Allocation will be performed with 'mem_ctx' as ralloc owner.
+     */
+    static ir_rvalue *error_value(void *mem_ctx);
+ 
+ protected:
+    ir_rvalue(enum ir_node_type t);
+ };
+ 
+ 
+ /**
+  * Variable storage classes
+  */
+ enum ir_variable_mode {
+    ir_var_auto = 0,             /**< Function local variables and globals. */
+    ir_var_uniform,              /**< Variable declared as a uniform. */
+    ir_var_shader_storage,       /**< Variable declared as an ssbo. */
+    ir_var_shader_shared,        /**< Variable declared as shared. */
+    ir_var_shader_in,
+    ir_var_shader_out,
+    ir_var_function_in,
+    ir_var_function_out,
+    ir_var_function_inout,
+    ir_var_const_in,             /**< "in" param that must be a constant expression */
+    ir_var_system_value,         /**< Ex: front-face, instance-id, etc. */
+    ir_var_temporary,            /**< Temporary variable generated during compilation. */
+    ir_var_mode_count            /**< Number of variable modes */
+ };
+ 
+ /**
+  * Enum keeping track of how a variable was declared.  For error checking of
+  * the gl_PerVertex redeclaration rules.
+  */
+ enum ir_var_declaration_type {
+    /**
+     * Normal declaration (for most variables, this means an explicit
+     * declaration.  Exception: temporaries are always implicitly declared, but
+     * they still use ir_var_declared_normally).
+     *
+     * Note: an ir_variable that represents a named interface block uses
+     * ir_var_declared_normally.
+     */
+    ir_var_declared_normally = 0,
+ 
+    /**
+     * Variable was explicitly declared (or re-declared) in an unnamed
+     * interface block.
+     */
+    ir_var_declared_in_block,
+ 
+    /**
+     * Variable is an implicitly declared built-in that has not been explicitly
+     * re-declared by the shader.
+     */
+    ir_var_declared_implicitly,
+ 
+    /**
+     * Variable is implicitly generated by the compiler and should not be
+     * visible via the API.
+     */
+    ir_var_hidden,
+ };
+ 
+ /**
+  * \brief Layout qualifiers for gl_FragDepth.
+  *
+  * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
+  * with a layout qualifier.
+  */
+ enum ir_depth_layout {
+     ir_depth_layout_none, /**< No depth layout is specified. */
+     ir_depth_layout_any,
+     ir_depth_layout_greater,
+     ir_depth_layout_less,
+     ir_depth_layout_unchanged
+ };
+ 
+ /**
+  * \brief Convert depth layout qualifier to string.
+  */
+ const char*
+ depth_layout_string(ir_depth_layout layout);
+ 
+ /**
+  * Description of built-in state associated with a uniform
+  *
+  * \sa ir_variable::state_slots
+  */
+ struct ir_state_slot {
+    int tokens[5];
+    int swizzle;
+ };
+ 
+ 
+ /**
+  * Get the string value for an interpolation qualifier
+  *
+  * \return The string that would be used in a shader to specify \c
+  * mode will be returned.
+  *
+  * This function is used to generate error messages of the form "shader
+  * uses %s interpolation qualifier", so in the case where there is no
+  * interpolation qualifier, it returns "no".
+  *
+  * This function should only be used on a shader input or output variable.
+  */
+ const char *interpolation_string(unsigned interpolation);
+ 
+ 
+ class ir_variable : public ir_instruction {
+ public:
+    ir_variable(const struct glsl_type *, const char *, ir_variable_mode);
+ 
+    virtual ir_variable *clone(void *mem_ctx, struct hash_table *ht) const;
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+ 
+    /**
+     * Determine how this variable should be interpolated based on its
+     * interpolation qualifier (if present), whether it is gl_Color or
+     * gl_SecondaryColor, and whether flatshading is enabled in the current GL
+     * state.
+     *
+     * The return value will always be either INTERP_QUALIFIER_SMOOTH,
+     * INTERP_QUALIFIER_NOPERSPECTIVE, or INTERP_QUALIFIER_FLAT.
+     */
+    glsl_interp_qualifier determine_interpolation_mode(bool flat_shade);
+ 
+    /**
+     * Determine whether or not a variable is part of a uniform or
+     * shader storage block.
+     */
+    inline bool is_in_buffer_block() const
+    {
+       return (this->data.mode == ir_var_uniform ||
+               this->data.mode == ir_var_shader_storage) &&
+              this->interface_type != NULL;
+    }
+ 
+    /**
+     * Determine whether or not a variable is part of a shader storage block.
+     */
+    inline bool is_in_shader_storage_block() const
+    {
+       return this->data.mode == ir_var_shader_storage &&
+              this->interface_type != NULL;
+    }
+ 
+    /**
+     * Determine whether or not a variable is the declaration of an interface
+     * block
+     *
+     * For the first declaration below, there will be an \c ir_variable named
+     * "instance" whose type and whose instance_type will be the same
+     *  \cglsl_type.  For the second declaration, there will be an \c ir_variable
+     * named "f" whose type is float and whose instance_type is B2.
+     *
+     * "instance" is an interface instance variable, but "f" is not.
+     *
+     * uniform B1 {
+     *     float f;
+     * } instance;
+     *
+     * uniform B2 {
+     *     float f;
+     * };
+     */
+    inline bool is_interface_instance() const
+    {
+       return this->type->without_array() == this->interface_type;
+    }
+ 
+    /**
+     * Set this->interface_type on a newly created variable.
+     */
+    void init_interface_type(const struct glsl_type *type)
+    {
+       assert(this->interface_type == NULL);
+       this->interface_type = type;
+       if (this->is_interface_instance()) {
+          this->u.max_ifc_array_access =
+             rzalloc_array(this, unsigned, type->length);
+       }
+    }
+ 
+    /**
+     * Change this->interface_type on a variable that previously had a
+     * different, but compatible, interface_type.  This is used during linking
+     * to set the size of arrays in interface blocks.
+     */
+    void change_interface_type(const struct glsl_type *type)
+    {
+       if (this->u.max_ifc_array_access != NULL) {
+          /* max_ifc_array_access has already been allocated, so make sure the
+           * new interface has the same number of fields as the old one.
+           */
+          assert(this->interface_type->length == type->length);
+       }
+       this->interface_type = type;
+    }
+ 
+    /**
+     * Change this->interface_type on a variable that previously had a
+     * different, and incompatible, interface_type. This is used during
+     * compilation to handle redeclaration of the built-in gl_PerVertex
+     * interface block.
+     */
+    void reinit_interface_type(const struct glsl_type *type)
+    {
+       if (this->u.max_ifc_array_access != NULL) {
+ #ifndef NDEBUG
+          /* Redeclaring gl_PerVertex is only allowed if none of the built-ins
+           * it defines have been accessed yet; so it's safe to throw away the
+           * old max_ifc_array_access pointer, since all of its values are
+           * zero.
+           */
+          for (unsigned i = 0; i < this->interface_type->length; i++)
+             assert(this->u.max_ifc_array_access[i] == 0);
+ #endif
+          ralloc_free(this->u.max_ifc_array_access);
+          this->u.max_ifc_array_access = NULL;
+       }
+       this->interface_type = NULL;
+       init_interface_type(type);
+    }
+ 
+    const glsl_type *get_interface_type() const
+    {
+       return this->interface_type;
+    }
+ 
+    /**
+     * Get the max_ifc_array_access pointer
+     *
+     * A "set" function is not needed because the array is dynmically allocated
+     * as necessary.
+     */
+    inline unsigned *get_max_ifc_array_access()
+    {
+       assert(this->data._num_state_slots == 0);
+       return this->u.max_ifc_array_access;
+    }
+ 
+    inline unsigned get_num_state_slots() const
+    {
+       assert(!this->is_interface_instance()
+              || this->data._num_state_slots == 0);
+       return this->data._num_state_slots;
+    }
+ 
+    inline void set_num_state_slots(unsigned n)
+    {
+       assert(!this->is_interface_instance()
+              || n == 0);
+       this->data._num_state_slots = n;
+    }
+ 
+    inline ir_state_slot *get_state_slots()
+    {
+       return this->is_interface_instance() ? NULL : this->u.state_slots;
+    }
+ 
+    inline const ir_state_slot *get_state_slots() const
+    {
+       return this->is_interface_instance() ? NULL : this->u.state_slots;
+    }
+ 
+    inline ir_state_slot *allocate_state_slots(unsigned n)
+    {
+       assert(!this->is_interface_instance());
+ 
+       this->u.state_slots = ralloc_array(this, ir_state_slot, n);
+       this->data._num_state_slots = 0;
+ 
+       if (this->u.state_slots != NULL)
+          this->data._num_state_slots = n;
+ 
+       return this->u.state_slots;
+    }
+ 
+    inline bool is_name_ralloced() const
+    {
+       return this->name != ir_variable::tmp_name;
+    }
+ 
+    /**
+     * Enable emitting extension warnings for this variable
+     */
+    void enable_extension_warning(const char *extension);
+ 
+    /**
+     * Get the extension warning string for this variable
+     *
+     * If warnings are not enabled, \c NULL is returned.
+     */
+    const char *get_extension_warning() const;
+ 
+    /**
+     * Declared type of the variable
+     */
+    const struct glsl_type *type;
+ 
+    /**
+     * Declared name of the variable
+     */
+    const char *name;
+ 
+    struct ir_variable_data {
+ 
+       /**
+        * Is the variable read-only?
+        *
+        * This is set for variables declared as \c const, shader inputs,
+        * and uniforms.
+        */
+       unsigned read_only:1;
+       unsigned centroid:1;
+       unsigned sample:1;
+       unsigned patch:1;
+       unsigned invariant:1;
+       unsigned precise:1;
+ 
+       /**
+        * Has this variable been used for reading or writing?
+        *
+        * Several GLSL semantic checks require knowledge of whether or not a
+        * variable has been used.  For example, it is an error to redeclare a
+        * variable as invariant after it has been used.
+        *
+        * This is only maintained in the ast_to_hir.cpp path, not in
+        * Mesa's fixed function or ARB program paths.
+        */
+       unsigned used:1;
+ 
+       /**
+        * Has this variable been statically assigned?
+        *
+        * This answers whether the variable was assigned in any path of
+        * the shader during ast_to_hir.  This doesn't answer whether it is
+        * still written after dead code removal, nor is it maintained in
+        * non-ast_to_hir.cpp (GLSL parsing) paths.
+        */
+       unsigned assigned:1;
+ 
+       /**
+        * When separate shader programs are enabled, only input/outputs between
+        * the stages of a multi-stage separate program can be safely removed
+        * from the shader interface. Other input/outputs must remains active.
+        */
+       unsigned always_active_io:1;
+ 
+       /**
+        * Enum indicating how the variable was declared.  See
+        * ir_var_declaration_type.
+        *
+        * This is used to detect certain kinds of illegal variable redeclarations.
+        */
+       unsigned how_declared:2;
+ 
+       /**
+        * Storage class of the variable.
+        *
+        * \sa ir_variable_mode
+        */
+       unsigned mode:4;
+ 
+       /**
+        * Interpolation mode for shader inputs / outputs
+        *
+        * \sa ir_variable_interpolation
+        */
+       unsigned interpolation:2;
+ 
+       /**
+        * \name ARB_fragment_coord_conventions
+        * @{
+        */
+       unsigned origin_upper_left:1;
+       unsigned pixel_center_integer:1;
+       /*@}*/
+ 
+       /**
+        * Was the location explicitly set in the shader?
+        *
+        * If the location is explicitly set in the shader, it \b cannot be changed
+        * by the linker or by the API (e.g., calls to \c glBindAttribLocation have
+        * no effect).
+        */
+       unsigned explicit_location:1;
+       unsigned explicit_index:1;
+ 
+       /**
+        * Was an initial binding explicitly set in the shader?
+        *
+        * If so, constant_value contains an integer ir_constant representing the
+        * initial binding point.
+        */
+       unsigned explicit_binding:1;
+ 
+       /**
+        * Does this variable have an initializer?
+        *
+        * This is used by the linker to cross-validiate initializers of global
+        * variables.
+        */
+       unsigned has_initializer:1;
+ 
+       /**
+        * Is this variable a generic output or input that has not yet been matched
+        * up to a variable in another stage of the pipeline?
+        *
+        * This is used by the linker as scratch storage while assigning locations
+        * to generic inputs and outputs.
+        */
+       unsigned is_unmatched_generic_inout:1;
+ 
+       /**
+        * If non-zero, then this variable may be packed along with other variables
+        * into a single varying slot, so this offset should be applied when
+        * accessing components.  For example, an offset of 1 means that the x
+        * component of this variable is actually stored in component y of the
+        * location specified by \c location.
+        */
+       unsigned location_frac:2;
+ 
+       /**
+        * Layout of the matrix.  Uses glsl_matrix_layout values.
+        */
+       unsigned matrix_layout:2;
+ 
+       /**
+        * Non-zero if this variable was created by lowering a named interface
+        * block which was not an array.
+        *
+        * Note that this variable and \c from_named_ifc_block_array will never
+        * both be non-zero.
+        */
+       unsigned from_named_ifc_block_nonarray:1;
+ 
+       /**
+        * Non-zero if this variable was created by lowering a named interface
+        * block which was an array.
+        *
+        * Note that this variable and \c from_named_ifc_block_nonarray will never
+        * both be non-zero.
+        */
+       unsigned from_named_ifc_block_array:1;
+ 
+       /**
+        * Non-zero if the variable must be a shader input. This is useful for
+        * constraints on function parameters.
+        */
+       unsigned must_be_shader_input:1;
+ 
+       /**
+        * Output index for dual source blending.
+        *
+        * \note
+        * The GLSL spec only allows the values 0 or 1 for the index in \b dual
+        * source blending.
+        */
+       unsigned index:1;
+ 
+       /**
+        * Precision qualifier.
+        *
+        * In desktop GLSL we do not care about precision qualifiers at all, in
+        * fact, the spec says that precision qualifiers are ignored.
+        *
+        * To make things easy, we make it so that this field is always
+        * GLSL_PRECISION_NONE on desktop shaders. This way all the variables
+        * have the same precision value and the checks we add in the compiler
+        * for this field will never break a desktop shader compile.
+        */
+       unsigned precision:2;
+ 
+       /**
+        * \brief Layout qualifier for gl_FragDepth.
+        *
+        * This is not equal to \c ir_depth_layout_none if and only if this
+        * variable is \c gl_FragDepth and a layout qualifier is specified.
+        */
+       ir_depth_layout depth_layout:3;
+ 
+       /**
+        * ARB_shader_image_load_store qualifiers.
+        */
+       unsigned image_read_only:1; /**< "readonly" qualifier. */
+       unsigned image_write_only:1; /**< "writeonly" qualifier. */
+       unsigned image_coherent:1;
+       unsigned image_volatile:1;
+       unsigned image_restrict:1;
+ 
+       /**
+        * ARB_shader_storage_buffer_object
+        */
+       unsigned from_ssbo_unsized_array:1; /**< unsized array buffer variable. */
+ 
+       /**
+        * Emit a warning if this variable is accessed.
+        */
+    private:
+       uint8_t warn_extension_index;
+ 
+    public:
+       /** Image internal format if specified explicitly, otherwise GL_NONE. */
+       uint16_t image_format;
+ 
+    private:
+       /**
+        * Number of state slots used
+        *
+        * \note
+        * This could be stored in as few as 7-bits, if necessary.  If it is made
+        * smaller, add an assertion to \c ir_variable::allocate_state_slots to
+        * be safe.
+        */
+       uint16_t _num_state_slots;
+ 
+    public:
+       /**
+        * Initial binding point for a sampler, atomic, or UBO.
+        *
+        * For array types, this represents the binding point for the first element.
+        */
+       int16_t binding;
+ 
+       /**
+        * Storage location of the base of this variable
+        *
+        * The precise meaning of this field depends on the nature of the variable.
+        *
+        *   - Vertex shader input: one of the values from \c gl_vert_attrib.
+        *   - Vertex shader output: one of the values from \c gl_varying_slot.
+        *   - Geometry shader input: one of the values from \c gl_varying_slot.
+        *   - Geometry shader output: one of the values from \c gl_varying_slot.
+        *   - Fragment shader input: one of the values from \c gl_varying_slot.
+        *   - Fragment shader output: one of the values from \c gl_frag_result.
+        *   - Uniforms: Per-stage uniform slot number for default uniform block.
+        *   - Uniforms: Index within the uniform block definition for UBO members.
+        *   - Non-UBO Uniforms: explicit location until linking then reused to
+        *     store uniform slot number.
+        *   - Other: This field is not currently used.
+        *
+        * If the variable is a uniform, shader input, or shader output, and the
+        * slot has not been assigned, the value will be -1.
+        */
+       int location;
+ 
+       /**
+        * Vertex stream output identifier.
+        */
+       unsigned stream;
+ 
+       /**
+        * Location an atomic counter is stored at.
+        */
+       unsigned offset;
+ 
+       /**
+        * Highest element accessed with a constant expression array index
+        *
+        * Not used for non-array variables.
+        */
+       unsigned max_array_access;
+ 
+       /**
+        * Allow (only) ir_variable direct access private members.
+        */
+       friend class ir_variable;
+    } data;
+ 
+    /**
+     * Value assigned in the initializer of a variable declared "const"
+     */
+    ir_constant *constant_value;
+ 
+    /**
+     * Constant expression assigned in the initializer of the variable
+     *
+     * \warning
+     * This field and \c ::constant_value are distinct.  Even if the two fields
+     * refer to constants with the same value, they must point to separate
+     * objects.
+     */
+    ir_constant *constant_initializer;
+ 
+ private:
+    static const char *const warn_extension_table[];
+ 
+    union {
+       /**
+        * For variables which satisfy the is_interface_instance() predicate,
+        * this points to an array of integers such that if the ith member of
+        * the interface block is an array, max_ifc_array_access[i] is the
+        * maximum array element of that member that has been accessed.  If the
+        * ith member of the interface block is not an array,
+        * max_ifc_array_access[i] is unused.
+        *
+        * For variables whose type is not an interface block, this pointer is
+        * NULL.
+        */
+       unsigned *max_ifc_array_access;
+ 
+       /**
+        * Built-in state that backs this uniform
+        *
+        * Once set at variable creation, \c state_slots must remain invariant.
+        *
+        * If the variable is not a uniform, \c _num_state_slots will be zero
+        * and \c state_slots will be \c NULL.
+        */
+       ir_state_slot *state_slots;
+    } u;
+ 
+    /**
+     * For variables that are in an interface block or are an instance of an
+     * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
+     *
+     * \sa ir_variable::location
+     */
+    const glsl_type *interface_type;
+ 
+    /**
+     * Name used for anonymous compiler temporaries
+     */
+    static const char tmp_name[];
+ 
+ public:
+    /**
+     * Should the construct keep names for ir_var_temporary variables?
+     *
+     * When this global is false, names passed to the constructor for
+     * \c ir_var_temporary variables will be dropped.  Instead, the variable will
+     * be named "compiler_temp".  This name will be in static storage.
+     *
+     * \warning
+     * \b NEVER change the mode of an \c ir_var_temporary.
+     *
+     * \warning
+     * This variable is \b not thread-safe.  It is global, \b not
+     * per-context. It begins life false.  A context can, at some point, make
+     * it true.  From that point on, it will be true forever.  This should be
+     * okay since it will only be set true while debugging.
+     */
+    static bool temporaries_allocate_names;
+ };
+ 
+ /**
+  * A function that returns whether a built-in function is available in the
+  * current shading language (based on version, ES or desktop, and extensions).
+  */
+ typedef bool (*builtin_available_predicate)(const _mesa_glsl_parse_state *);
+ 
+ /*@{*/
+ /**
+  * The representation of a function instance; may be the full definition or
+  * simply a prototype.
+  */
+ class ir_function_signature : public ir_instruction {
+    /* An ir_function_signature will be part of the list of signatures in
+     * an ir_function.
+     */
+ public:
+    ir_function_signature(const glsl_type *return_type,
+                          builtin_available_predicate builtin_avail = NULL);
+ 
+    virtual ir_function_signature *clone(void *mem_ctx,
+                                       struct hash_table *ht) const;
+    ir_function_signature *clone_prototype(void *mem_ctx,
+                                         struct hash_table *ht) const;
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    /**
+     * Attempt to evaluate this function as a constant expression,
+     * given a list of the actual parameters and the variable context.
+     * Returns NULL for non-built-ins.
+     */
+    ir_constant *constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context);
+ 
+    /**
+     * Get the name of the function for which this is a signature
+     */
+    const char *function_name() const;
+ 
+    /**
+     * Get a handle to the function for which this is a signature
+     *
+     * There is no setter function, this function returns a \c const pointer,
+     * and \c ir_function_signature::_function is private for a reason.  The
+     * only way to make a connection between a function and function signature
+     * is via \c ir_function::add_signature.  This helps ensure that certain
+     * invariants (i.e., a function signature is in the list of signatures for
+     * its \c _function) are met.
+     *
+     * \sa ir_function::add_signature
+     */
+    inline const class ir_function *function() const
+    {
+       return this->_function;
+    }
+ 
+    /**
+     * Check whether the qualifiers match between this signature's parameters
+     * and the supplied parameter list.  If not, returns the name of the first
+     * parameter with mismatched qualifiers (for use in error messages).
+     */
+    const char *qualifiers_match(exec_list *params);
+ 
+    /**
+     * Replace the current parameter list with the given one.  This is useful
+     * if the current information came from a prototype, and either has invalid
+     * or missing parameter names.
+     */
+    void replace_parameters(exec_list *new_params);
+ 
+    /**
+     * Function return type.
+     *
+     * \note This discards the optional precision qualifier.
+     */
+    const struct glsl_type *return_type;
+ 
+    /**
+     * List of ir_variable of function parameters.
+     *
+     * This represents the storage.  The paramaters passed in a particular
+     * call will be in ir_call::actual_paramaters.
+     */
+    struct exec_list parameters;
+ 
+    /** Whether or not this function has a body (which may be empty). */
+    unsigned is_defined:1;
+ 
+    /** Whether or not this function signature is a built-in. */
+    bool is_builtin() const;
+ 
+    /**
+     * Whether or not this function is an intrinsic to be implemented
+     * by the driver.
+     */
+    bool is_intrinsic;
+ 
+    /** Whether or not a built-in is available for this shader. */
+    bool is_builtin_available(const _mesa_glsl_parse_state *state) const;
+ 
+    /** Body of instructions in the function. */
+    struct exec_list body;
+ 
+ private:
+    /**
+     * A function pointer to a predicate that answers whether a built-in
+     * function is available in the current shader.  NULL if not a built-in.
+     */
+    builtin_available_predicate builtin_avail;
+ 
+    /** Function of which this signature is one overload. */
+    class ir_function *_function;
+ 
+    /** Function signature of which this one is a prototype clone */
+    const ir_function_signature *origin;
+ 
+    friend class ir_function;
+ 
+    /**
+     * Helper function to run a list of instructions for constant
+     * expression evaluation.
+     *
+     * The hash table represents the values of the visible variables.
+     * There are no scoping issues because the table is indexed on
+     * ir_variable pointers, not variable names.
+     *
+     * Returns false if the expression is not constant, true otherwise,
+     * and the value in *result if result is non-NULL.
+     */
+    bool constant_expression_evaluate_expression_list(const struct exec_list &body,
+                                                    struct hash_table *variable_context,
+                                                    ir_constant **result);
+ };
+ 
+ 
+ /**
+  * Header for tracking multiple overloaded functions with the same name.
+  * Contains a list of ir_function_signatures representing each of the
+  * actual functions.
+  */
+ class ir_function : public ir_instruction {
+ public:
+    ir_function(const char *name);
+ 
+    virtual ir_function *clone(void *mem_ctx, struct hash_table *ht) const;
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    void add_signature(ir_function_signature *sig)
+    {
+       sig->_function = this;
+       this->signatures.push_tail(sig);
+    }
+ 
+    /**
+     * Find a signature that matches a set of actual parameters, taking implicit
+     * conversions into account.  Also flags whether the match was exact.
+     */
+    ir_function_signature *matching_signature(_mesa_glsl_parse_state *state,
+                                              const exec_list *actual_param,
+                                              bool allow_builtins,
+                                            bool *match_is_exact);
+ 
+    /**
+     * Find a signature that matches a set of actual parameters, taking implicit
+     * conversions into account.
+     */
+    ir_function_signature *matching_signature(_mesa_glsl_parse_state *state,
+                                              const exec_list *actual_param,
+                                              bool allow_builtins);
+ 
+    /**
+     * Find a signature that exactly matches a set of actual parameters without
+     * any implicit type conversions.
+     */
+    ir_function_signature *exact_matching_signature(_mesa_glsl_parse_state *state,
+                                                    const exec_list *actual_ps);
+ 
+    /**
+     * Name of the function.
+     */
+    const char *name;
+ 
+    /** Whether or not this function has a signature that isn't a built-in. */
+    bool has_user_signature();
+ 
+    /**
+     * List of ir_function_signature for each overloaded function with this name.
+     */
+    struct exec_list signatures;
+ 
+    /**
+     * is this function a subroutine type declaration
+     * e.g. subroutine void type1(float arg1);
+     */
+    bool is_subroutine;
+ 
+    /**
+     * is this function associated to a subroutine type
+     * e.g. subroutine (type1, type2) function_name { function_body };
+     * would have num_subroutine_types 2,
+     * and pointers to the type1 and type2 types.
+     */
+    int num_subroutine_types;
+    const struct glsl_type **subroutine_types;
+ 
+    int subroutine_index;
+ };
+ 
+ inline const char *ir_function_signature::function_name() const
+ {
+    return this->_function->name;
+ }
+ /*@}*/
+ 
+ 
+ /**
+  * IR instruction representing high-level if-statements
+  */
+ class ir_if : public ir_instruction {
+ public:
+    ir_if(ir_rvalue *condition)
+       : ir_instruction(ir_type_if), condition(condition)
+    {
+    }
+ 
+    virtual ir_if *clone(void *mem_ctx, struct hash_table *ht) const;
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    ir_rvalue *condition;
+    /** List of ir_instruction for the body of the then branch */
+    exec_list  then_instructions;
+    /** List of ir_instruction for the body of the else branch */
+    exec_list  else_instructions;
+ };
+ 
+ 
+ /**
+  * IR instruction representing a high-level loop structure.
+  */
+ class ir_loop : public ir_instruction {
+ public:
+    ir_loop();
+ 
+    virtual ir_loop *clone(void *mem_ctx, struct hash_table *ht) const;
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    /** List of ir_instruction that make up the body of the loop. */
+    exec_list body_instructions;
+ };
+ 
+ 
+ class ir_assignment : public ir_instruction {
+ public:
+    ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, ir_rvalue *condition = NULL);
+ 
+    /**
+     * Construct an assignment with an explicit write mask
+     *
+     * \note
+     * Since a write mask is supplied, the LHS must already be a bare
+     * \c ir_dereference.  The cannot be any swizzles in the LHS.
+     */
+    ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, ir_rvalue *condition,
+                unsigned write_mask);
+ 
+    virtual ir_assignment *clone(void *mem_ctx, struct hash_table *ht) const;
+ 
+    virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    /**
+     * Get a whole variable written by an assignment
+     *
+     * If the LHS of the assignment writes a whole variable, the variable is
+     * returned.  Otherwise \c NULL is returned.  Examples of whole-variable
+     * assignment are:
+     *
+     *  - Assigning to a scalar
+     *  - Assigning to all components of a vector
+     *  - Whole array (or matrix) assignment
+     *  - Whole structure assignment
+     */
+    ir_variable *whole_variable_written();
+ 
+    /**
+     * Set the LHS of an assignment
+     */
+    void set_lhs(ir_rvalue *lhs);
+ 
+    /**
+     * Left-hand side of the assignment.
+     *
+     * This should be treated as read only.  If you need to set the LHS of an
+     * assignment, use \c ir_assignment::set_lhs.
+     */
+    ir_dereference *lhs;
+ 
+    /**
+     * Value being assigned
+     */
+    ir_rvalue *rhs;
+ 
+    /**
+     * Optional condition for the assignment.
+     */
+    ir_rvalue *condition;
+ 
+ 
+    /**
+     * Component mask written
+     *
+     * For non-vector types in the LHS, this field will be zero.  For vector
+     * types, a bit will be set for each component that is written.  Note that
+     * for \c vec2 and \c vec3 types only the lower bits will ever be set.
+     *
+     * A partially-set write mask means that each enabled channel gets
+     * the value from a consecutive channel of the rhs.  For example,
+     * to write just .xyw of gl_FrontColor with color:
+     *
+     * (assign (constant bool (1)) (xyw)
+     *     (var_ref gl_FragColor)
+     *     (swiz xyw (var_ref color)))
+     */
+    unsigned write_mask:4;
+ };
+ 
+ /* Update ir_expression::get_num_operands() and operator_strs when
+  * updating this list.
+  */
+ enum ir_expression_operation {
+    ir_unop_bit_not,
+    ir_unop_logic_not,
+    ir_unop_neg,
+    ir_unop_abs,
+    ir_unop_sign,
+    ir_unop_rcp,
+    ir_unop_rsq,
+    ir_unop_sqrt,
+    ir_unop_exp,         /**< Log base e on gentype */
+    ir_unop_log,               /**< Natural log on gentype */
+    ir_unop_exp2,
+    ir_unop_log2,
+    ir_unop_f2i,         /**< Float-to-integer conversion. */
+    ir_unop_f2u,         /**< Float-to-unsigned conversion. */
+    ir_unop_i2f,         /**< Integer-to-float conversion. */
+    ir_unop_f2b,         /**< Float-to-boolean conversion */
+    ir_unop_b2f,         /**< Boolean-to-float conversion */
+    ir_unop_i2b,         /**< int-to-boolean conversion */
+    ir_unop_b2i,         /**< Boolean-to-int conversion */
+    ir_unop_u2f,         /**< Unsigned-to-float conversion. */
+    ir_unop_i2u,         /**< Integer-to-unsigned conversion. */
+    ir_unop_u2i,         /**< Unsigned-to-integer conversion. */
+    ir_unop_d2f,         /**< Double-to-float conversion. */
+    ir_unop_f2d,         /**< Float-to-double conversion. */
+    ir_unop_d2i,         /**< Double-to-integer conversion. */
+    ir_unop_i2d,         /**< Integer-to-double conversion. */
+    ir_unop_d2u,         /**< Double-to-unsigned conversion. */
+    ir_unop_u2d,         /**< Unsigned-to-double conversion. */
+    ir_unop_d2b,         /**< Double-to-boolean conversion. */
+    ir_unop_bitcast_i2f, /**< Bit-identical int-to-float "conversion" */
+    ir_unop_bitcast_f2i, /**< Bit-identical float-to-int "conversion" */
+    ir_unop_bitcast_u2f, /**< Bit-identical uint-to-float "conversion" */
+    ir_unop_bitcast_f2u, /**< Bit-identical float-to-uint "conversion" */
+ 
+    /**
+     * \name Unary floating-point rounding operations.
+     */
+    /*@{*/
+    ir_unop_trunc,
+    ir_unop_ceil,
+    ir_unop_floor,
+    ir_unop_fract,
+    ir_unop_round_even,
+    /*@}*/
+ 
+    /**
+     * \name Trigonometric operations.
+     */
+    /*@{*/
+    ir_unop_sin,
+    ir_unop_cos,
+    /*@}*/
+ 
+    /**
+     * \name Partial derivatives.
+     */
+    /*@{*/
+    ir_unop_dFdx,
+    ir_unop_dFdx_coarse,
+    ir_unop_dFdx_fine,
+    ir_unop_dFdy,
+    ir_unop_dFdy_coarse,
+    ir_unop_dFdy_fine,
+    /*@}*/
+ 
+    /**
+     * \name Floating point pack and unpack operations.
+     */
+    /*@{*/
+    ir_unop_pack_snorm_2x16,
+    ir_unop_pack_snorm_4x8,
+    ir_unop_pack_unorm_2x16,
+    ir_unop_pack_unorm_4x8,
+    ir_unop_pack_half_2x16,
+    ir_unop_unpack_snorm_2x16,
+    ir_unop_unpack_snorm_4x8,
+    ir_unop_unpack_unorm_2x16,
+    ir_unop_unpack_unorm_4x8,
+    ir_unop_unpack_half_2x16,
+    /*@}*/
+ 
- -   /**
- -    * \name Lowered floating point packing operations.
- -    *
- -    * \see lower_packing_builtins_visitor::split_pack_half_2x16
- -    */
- -   /*@{*/
- -   ir_binop_pack_half_2x16_split,
- -   /*@}*/
- -
+    /**
+     * \name Bit operations, part of ARB_gpu_shader5.
+     */
+    /*@{*/
+    ir_unop_bitfield_reverse,
+    ir_unop_bit_count,
+    ir_unop_find_msb,
+    ir_unop_find_lsb,
+    /*@}*/
+ 
+    ir_unop_saturate,
+ 
+    /**
+     * \name Double packing, part of ARB_gpu_shader_fp64.
+     */
+    /*@{*/
+    ir_unop_pack_double_2x32,
+    ir_unop_unpack_double_2x32,
+    /*@}*/
+ 
+    ir_unop_frexp_sig,
+    ir_unop_frexp_exp,
+ 
+    ir_unop_noise,
+ 
+    ir_unop_subroutine_to_int,
+    /**
+     * Interpolate fs input at centroid
+     *
+     * operand0 is the fs input.
+     */
+    ir_unop_interpolate_at_centroid,
+ 
+    /**
+     * Ask the driver for the total size of a buffer block.
+     *
+     * operand0 is the ir_constant buffer block index in the linked shader.
+     */
+    ir_unop_get_buffer_size,
+ 
+    /**
+     * Calculate length of an unsized array inside a buffer block.
+     * This opcode is going to be replaced in a lowering pass inside
+     * the linker.
+     *
+     * operand0 is the unsized array's ir_value for the calculation
+     * of its length.
+     */
+    ir_unop_ssbo_unsized_array_length,
+ 
+    /**
+     * A sentinel marking the last of the unary operations.
+     */
+    ir_last_unop = ir_unop_ssbo_unsized_array_length,
+ 
+    ir_binop_add,
+    ir_binop_sub,
+    ir_binop_mul,       /**< Floating-point or low 32-bit integer multiply. */
+    ir_binop_imul_high, /**< Calculates the high 32-bits of a 64-bit multiply. */
+    ir_binop_div,
+ 
+    /**
+     * Returns the carry resulting from the addition of the two arguments.
+     */
+    /*@{*/
+    ir_binop_carry,
+    /*@}*/
+ 
+    /**
+     * Returns the borrow resulting from the subtraction of the second argument
+     * from the first argument.
+     */
+    /*@{*/
+    ir_binop_borrow,
+    /*@}*/
+ 
+    /**
+     * Takes one of two combinations of arguments:
+     *
+     * - mod(vecN, vecN)
+     * - mod(vecN, float)
+     *
+     * Does not take integer types.
+     */
+    ir_binop_mod,
+ 
+    /**
+     * \name Binary comparison operators which return a boolean vector.
+     * The type of both operands must be equal.
+     */
+    /*@{*/
+    ir_binop_less,
+    ir_binop_greater,
+    ir_binop_lequal,
+    ir_binop_gequal,
+    ir_binop_equal,
+    ir_binop_nequal,
+    /**
+     * Returns single boolean for whether all components of operands[0]
+     * equal the components of operands[1].
+     */
+    ir_binop_all_equal,
+    /**
+     * Returns single boolean for whether any component of operands[0]
+     * is not equal to the corresponding component of operands[1].
+     */
+    ir_binop_any_nequal,
+    /*@}*/
+ 
+    /**
+     * \name Bit-wise binary operations.
+     */
+    /*@{*/
+    ir_binop_lshift,
+    ir_binop_rshift,
+    ir_binop_bit_and,
+    ir_binop_bit_xor,
+    ir_binop_bit_or,
+    /*@}*/
+ 
+    ir_binop_logic_and,
+    ir_binop_logic_xor,
+    ir_binop_logic_or,
+ 
+    ir_binop_dot,
+    ir_binop_min,
+    ir_binop_max,
+ 
+    ir_binop_pow,
+ 
+    /**
+     * Load a value the size of a given GLSL type from a uniform block.
+     *
+     * operand0 is the ir_constant uniform block index in the linked shader.
+     * operand1 is a byte offset within the uniform block.
+     */
+    ir_binop_ubo_load,
+ 
+    /**
+     * \name Multiplies a number by two to a power, part of ARB_gpu_shader5.
+     */
+    /*@{*/
+    ir_binop_ldexp,
+    /*@}*/
+ 
+    /**
+     * Extract a scalar from a vector
+     *
+     * operand0 is the vector
+     * operand1 is the index of the field to read from operand0
+     */
+    ir_binop_vector_extract,
+ 
+    /**
+     * Interpolate fs input at offset
+     *
+     * operand0 is the fs input
+     * operand1 is the offset from the pixel center
+     */
+    ir_binop_interpolate_at_offset,
+ 
+    /**
+     * Interpolate fs input at sample position
+     *
+     * operand0 is the fs input
+     * operand1 is the sample ID
+     */
+    ir_binop_interpolate_at_sample,
+ 
+    /**
+     * A sentinel marking the last of the binary operations.
+     */
+    ir_last_binop = ir_binop_interpolate_at_sample,
+ 
+    /**
+     * \name Fused floating-point multiply-add, part of ARB_gpu_shader5.
+     */
+    /*@{*/
+    ir_triop_fma,
+    /*@}*/
+ 
+    ir_triop_lrp,
+ 
+    /**
+     * \name Conditional Select
+     *
+     * A vector conditional select instruction (like ?:, but operating per-
+     * component on vectors).
+     *
+     * \see lower_instructions_visitor::ldexp_to_arith
+     */
+    /*@{*/
+    ir_triop_csel,
+    /*@}*/
+ 
+    ir_triop_bitfield_extract,
+ 
+    /**
+     * Generate a value with one field of a vector changed
+     *
+     * operand0 is the vector
+     * operand1 is the value to write into the vector result
+     * operand2 is the index in operand0 to be modified
+     */
+    ir_triop_vector_insert,
+ 
+    /**
+     * A sentinel marking the last of the ternary operations.
+     */
+    ir_last_triop = ir_triop_vector_insert,
+ 
+    ir_quadop_bitfield_insert,
+ 
+    ir_quadop_vector,
+ 
+    /**
+     * A sentinel marking the last of the ternary operations.
+     */
+    ir_last_quadop = ir_quadop_vector,
+ 
+    /**
+     * A sentinel marking the last of all operations.
+     */
+    ir_last_opcode = ir_quadop_vector
+ };
+ 
+ class ir_expression : public ir_rvalue {
+ public:
+    ir_expression(int op, const struct glsl_type *type,
+                  ir_rvalue *op0, ir_rvalue *op1 = NULL,
+                  ir_rvalue *op2 = NULL, ir_rvalue *op3 = NULL);
+ 
+    /**
+     * Constructor for unary operation expressions
+     */
+    ir_expression(int op, ir_rvalue *);
+ 
+    /**
+     * Constructor for binary operation expressions
+     */
+    ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1);
+ 
+    /**
+     * Constructor for ternary operation expressions
+     */
+    ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, ir_rvalue *op2);
+ 
+    virtual bool equals(const ir_instruction *ir,
+                        enum ir_node_type ignore = ir_type_unset) const;
+ 
+    virtual ir_expression *clone(void *mem_ctx, struct hash_table *ht) const;
+ 
+    /**
+     * Attempt to constant-fold the expression
+     *
+     * The "variable_context" hash table links ir_variable * to ir_constant *
+     * that represent the variables' values.  \c NULL represents an empty
+     * context.
+     *
+     * If the expression cannot be constant folded, this method will return
+     * \c NULL.
+     */
+    virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ 
+    /**
+     * Determine the number of operands used by an expression
+     */
+    static unsigned int get_num_operands(ir_expression_operation);
+ 
+    /**
+     * Determine the number of operands used by an expression
+     */
+    unsigned int get_num_operands() const
+    {
+       return (this->operation == ir_quadop_vector)
+        ? this->type->vector_elements : get_num_operands(operation);
+    }
+ 
+    /**
+     * Return whether the expression operates on vectors horizontally.
+     */
+    bool is_horizontal() const
+    {
+       return operation == ir_binop_all_equal ||
+              operation == ir_binop_any_nequal ||
+              operation == ir_binop_dot ||
+              operation == ir_binop_vector_extract ||
+              operation == ir_triop_vector_insert ||
+              operation == ir_quadop_vector;
+    }
+ 
+    /**
+     * Return a string representing this expression's operator.
+     */
+    const char *operator_string();
+ 
+    /**
+     * Return a string representing this expression's operator.
+     */
+    static const char *operator_string(ir_expression_operation);
+ 
+ 
+    /**
+     * Do a reverse-lookup to translate the given string into an operator.
+     */
+    static ir_expression_operation get_operator(const char *);
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    virtual ir_variable *variable_referenced() const;
+ 
+    ir_expression_operation operation;
+    ir_rvalue *operands[4];
+ };
+ 
+ 
+ /**
+  * HIR instruction representing a high-level function call, containing a list
+  * of parameters and returning a value in the supplied temporary.
+  */
+ class ir_call : public ir_instruction {
+ public:
+    ir_call(ir_function_signature *callee,
+          ir_dereference_variable *return_deref,
+          exec_list *actual_parameters)
+       : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(NULL), array_idx(NULL)
+    {
+       assert(callee->return_type != NULL);
+       actual_parameters->move_nodes_to(& this->actual_parameters);
+       this->use_builtin = callee->is_builtin();
+    }
+ 
+    ir_call(ir_function_signature *callee,
+          ir_dereference_variable *return_deref,
+          exec_list *actual_parameters,
+          ir_variable *var, ir_rvalue *array_idx)
+       : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(var), array_idx(array_idx)
+    {
+       assert(callee->return_type != NULL);
+       actual_parameters->move_nodes_to(& this->actual_parameters);
+       this->use_builtin = callee->is_builtin();
+    }
+ 
+    virtual ir_call *clone(void *mem_ctx, struct hash_table *ht) const;
+ 
+    virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    /**
+     * Get the name of the function being called.
+     */
+    const char *callee_name() const
+    {
+       return callee->function_name();
+    }
+ 
+    /**
+     * Generates an inline version of the function before @ir,
+     * storing the return value in return_deref.
+     */
+    void generate_inline(ir_instruction *ir);
+ 
+    /**
+     * Storage for the function's return value.
+     * This must be NULL if the return type is void.
+     */
+    ir_dereference_variable *return_deref;
+ 
+    /**
+     * The specific function signature being called.
+     */
+    ir_function_signature *callee;
+ 
+    /* List of ir_rvalue of paramaters passed in this call. */
+    exec_list actual_parameters;
+ 
+    /** Should this call only bind to a built-in function? */
+    bool use_builtin;
+ 
+    /*
+     * ARB_shader_subroutine support -
+     * the subroutine uniform variable and array index
+     * rvalue to be used in the lowering pass later.
+     */
+    ir_variable *sub_var;
+    ir_rvalue *array_idx;
+ };
+ 
+ 
+ /**
+  * \name Jump-like IR instructions.
+  *
+  * These include \c break, \c continue, \c return, and \c discard.
+  */
+ /*@{*/
+ class ir_jump : public ir_instruction {
+ protected:
+    ir_jump(enum ir_node_type t)
+       : ir_instruction(t)
+    {
+    }
+ };
+ 
+ class ir_return : public ir_jump {
+ public:
+    ir_return()
+       : ir_jump(ir_type_return), value(NULL)
+    {
+    }
+ 
+    ir_return(ir_rvalue *value)
+       : ir_jump(ir_type_return), value(value)
+    {
+    }
+ 
+    virtual ir_return *clone(void *mem_ctx, struct hash_table *) const;
+ 
+    ir_rvalue *get_value() const
+    {
+       return value;
+    }
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    ir_rvalue *value;
+ };
+ 
+ 
+ /**
+  * Jump instructions used inside loops
+  *
+  * These include \c break and \c continue.  The \c break within a loop is
+  * different from the \c break within a switch-statement.
+  *
+  * \sa ir_switch_jump
+  */
+ class ir_loop_jump : public ir_jump {
+ public:
+    enum jump_mode {
+       jump_break,
+       jump_continue
+    };
+ 
+    ir_loop_jump(jump_mode mode)
+       : ir_jump(ir_type_loop_jump)
+    {
+       this->mode = mode;
+    }
+ 
+    virtual ir_loop_jump *clone(void *mem_ctx, struct hash_table *) const;
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    bool is_break() const
+    {
+       return mode == jump_break;
+    }
+ 
+    bool is_continue() const
+    {
+       return mode == jump_continue;
+    }
+ 
+    /** Mode selector for the jump instruction. */
+    enum jump_mode mode;
+ };
+ 
+ /**
+  * IR instruction representing discard statements.
+  */
+ class ir_discard : public ir_jump {
+ public:
+    ir_discard()
+       : ir_jump(ir_type_discard)
+    {
+       this->condition = NULL;
+    }
+ 
+    ir_discard(ir_rvalue *cond)
+       : ir_jump(ir_type_discard)
+    {
+       this->condition = cond;
+    }
+ 
+    virtual ir_discard *clone(void *mem_ctx, struct hash_table *ht) const;
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    ir_rvalue *condition;
+ };
+ /*@}*/
+ 
+ 
+ /**
+  * Texture sampling opcodes used in ir_texture
+  */
+ enum ir_texture_opcode {
+    ir_tex,            /**< Regular texture look-up */
+    ir_txb,            /**< Texture look-up with LOD bias */
+    ir_txl,            /**< Texture look-up with explicit LOD */
+    ir_txd,            /**< Texture look-up with partial derivatvies */
+    ir_txf,            /**< Texel fetch with explicit LOD */
+    ir_txf_ms,           /**< Multisample texture fetch */
+    ir_txs,            /**< Texture size */
+    ir_lod,            /**< Texture lod query */
+    ir_tg4,            /**< Texture gather */
+    ir_query_levels,     /**< Texture levels query */
+    ir_texture_samples,  /**< Texture samples query */
+    ir_samples_identical, /**< Query whether all samples are definitely identical. */
+ };
+ 
+ 
+ /**
+  * IR instruction to sample a texture
+  *
+  * The specific form of the IR instruction depends on the \c mode value
+  * selected from \c ir_texture_opcodes.  In the printed IR, these will
+  * appear as:
+  *
+  *                                    Texel offset (0 or an expression)
+  *                                    | Projection divisor
+  *                                    | |  Shadow comparitor
+  *                                    | |  |
+  *                                    v v  v
+  * (tex <type> <sampler> <coordinate> 0 1 ( ))
+  * (txb <type> <sampler> <coordinate> 0 1 ( ) <bias>)
+  * (txl <type> <sampler> <coordinate> 0 1 ( ) <lod>)
+  * (txd <type> <sampler> <coordinate> 0 1 ( ) (dPdx dPdy))
+  * (txf <type> <sampler> <coordinate> 0       <lod>)
+  * (txf_ms
+  *      <type> <sampler> <coordinate>         <sample_index>)
+  * (txs <type> <sampler> <lod>)
+  * (lod <type> <sampler> <coordinate>)
+  * (tg4 <type> <sampler> <coordinate> <offset> <component>)
+  * (query_levels <type> <sampler>)
+  * (samples_identical <sampler> <coordinate>)
+  */
+ class ir_texture : public ir_rvalue {
+ public:
+    ir_texture(enum ir_texture_opcode op)
+       : ir_rvalue(ir_type_texture),
+         op(op), sampler(NULL), coordinate(NULL), projector(NULL),
+         shadow_comparitor(NULL), offset(NULL)
+    {
+       memset(&lod_info, 0, sizeof(lod_info));
+    }
+ 
+    virtual ir_texture *clone(void *mem_ctx, struct hash_table *) const;
+ 
+    virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    virtual bool equals(const ir_instruction *ir,
+                        enum ir_node_type ignore = ir_type_unset) const;
+ 
+    /**
+     * Return a string representing the ir_texture_opcode.
+     */
+    const char *opcode_string();
+ 
+    /** Set the sampler and type. */
+    void set_sampler(ir_dereference *sampler, const glsl_type *type);
+ 
+    /**
+     * Do a reverse-lookup to translate a string into an ir_texture_opcode.
+     */
+    static ir_texture_opcode get_opcode(const char *);
+ 
+    enum ir_texture_opcode op;
+ 
+    /** Sampler to use for the texture access. */
+    ir_dereference *sampler;
+ 
+    /** Texture coordinate to sample */
+    ir_rvalue *coordinate;
+ 
+    /**
+     * Value used for projective divide.
+     *
+     * If there is no projective divide (the common case), this will be
+     * \c NULL.  Optimization passes should check for this to point to a constant
+     * of 1.0 and replace that with \c NULL.
+     */
+    ir_rvalue *projector;
+ 
+    /**
+     * Coordinate used for comparison on shadow look-ups.
+     *
+     * If there is no shadow comparison, this will be \c NULL.  For the
+     * \c ir_txf opcode, this *must* be \c NULL.
+     */
+    ir_rvalue *shadow_comparitor;
+ 
+    /** Texel offset. */
+    ir_rvalue *offset;
+ 
+    union {
+       ir_rvalue *lod;         /**< Floating point LOD */
+       ir_rvalue *bias;                /**< Floating point LOD bias */
+       ir_rvalue *sample_index;  /**< MSAA sample index */
+       ir_rvalue *component;     /**< Gather component selector */
+       struct {
+        ir_rvalue *dPdx;       /**< Partial derivative of coordinate wrt X */
+        ir_rvalue *dPdy;       /**< Partial derivative of coordinate wrt Y */
+       } grad;
+    } lod_info;
+ };
+ 
+ 
+ struct ir_swizzle_mask {
+    unsigned x:2;
+    unsigned y:2;
+    unsigned z:2;
+    unsigned w:2;
+ 
+    /**
+     * Number of components in the swizzle.
+     */
+    unsigned num_components:3;
+ 
+    /**
+     * Does the swizzle contain duplicate components?
+     *
+     * L-value swizzles cannot contain duplicate components.
+     */
+    unsigned has_duplicates:1;
+ };
+ 
+ 
+ class ir_swizzle : public ir_rvalue {
+ public:
+    ir_swizzle(ir_rvalue *, unsigned x, unsigned y, unsigned z, unsigned w,
+               unsigned count);
+ 
+    ir_swizzle(ir_rvalue *val, const unsigned *components, unsigned count);
+ 
+    ir_swizzle(ir_rvalue *val, ir_swizzle_mask mask);
+ 
+    virtual ir_swizzle *clone(void *mem_ctx, struct hash_table *) const;
+ 
+    virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ 
+    /**
+     * Construct an ir_swizzle from the textual representation.  Can fail.
+     */
+    static ir_swizzle *create(ir_rvalue *, const char *, unsigned vector_length);
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    virtual bool equals(const ir_instruction *ir,
+                        enum ir_node_type ignore = ir_type_unset) const;
+ 
+    bool is_lvalue() const
+    {
+       return val->is_lvalue() && !mask.has_duplicates;
+    }
+ 
+    /**
+     * Get the variable that is ultimately referenced by an r-value
+     */
+    virtual ir_variable *variable_referenced() const;
+ 
+    ir_rvalue *val;
+    ir_swizzle_mask mask;
+ 
+ private:
+    /**
+     * Initialize the mask component of a swizzle
+     *
+     * This is used by the \c ir_swizzle constructors.
+     */
+    void init_mask(const unsigned *components, unsigned count);
+ };
+ 
+ 
+ class ir_dereference : public ir_rvalue {
+ public:
+    virtual ir_dereference *clone(void *mem_ctx, struct hash_table *) const = 0;
+ 
+    bool is_lvalue() const;
+ 
+    /**
+     * Get the variable that is ultimately referenced by an r-value
+     */
+    virtual ir_variable *variable_referenced() const = 0;
+ 
+ protected:
+    ir_dereference(enum ir_node_type t)
+       : ir_rvalue(t)
+    {
+    }
+ };
+ 
+ 
+ class ir_dereference_variable : public ir_dereference {
+ public:
+    ir_dereference_variable(ir_variable *var);
+ 
+    virtual ir_dereference_variable *clone(void *mem_ctx,
+                                         struct hash_table *) const;
+ 
+    virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ 
+    virtual bool equals(const ir_instruction *ir,
+                        enum ir_node_type ignore = ir_type_unset) const;
+ 
+    /**
+     * Get the variable that is ultimately referenced by an r-value
+     */
+    virtual ir_variable *variable_referenced() const
+    {
+       return this->var;
+    }
+ 
+    virtual ir_variable *whole_variable_referenced()
+    {
+       /* ir_dereference_variable objects always dereference the entire
+        * variable.  However, if this dereference is dereferenced by anything
+        * else, the complete deferefernce chain is not a whole-variable
+        * dereference.  This method should only be called on the top most
+        * ir_rvalue in a dereference chain.
+        */
+       return this->var;
+    }
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    /**
+     * Object being dereferenced.
+     */
+    ir_variable *var;
+ };
+ 
+ 
+ class ir_dereference_array : public ir_dereference {
+ public:
+    ir_dereference_array(ir_rvalue *value, ir_rvalue *array_index);
+ 
+    ir_dereference_array(ir_variable *var, ir_rvalue *array_index);
+ 
+    virtual ir_dereference_array *clone(void *mem_ctx,
+                                      struct hash_table *) const;
+ 
+    virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ 
+    virtual bool equals(const ir_instruction *ir,
+                        enum ir_node_type ignore = ir_type_unset) const;
+ 
+    /**
+     * Get the variable that is ultimately referenced by an r-value
+     */
+    virtual ir_variable *variable_referenced() const
+    {
+       return this->array->variable_referenced();
+    }
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    ir_rvalue *array;
+    ir_rvalue *array_index;
+ 
+ private:
+    void set_array(ir_rvalue *value);
+ };
+ 
+ 
+ class ir_dereference_record : public ir_dereference {
+ public:
+    ir_dereference_record(ir_rvalue *value, const char *field);
+ 
+    ir_dereference_record(ir_variable *var, const char *field);
+ 
+    virtual ir_dereference_record *clone(void *mem_ctx,
+                                       struct hash_table *) const;
+ 
+    virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ 
+    /**
+     * Get the variable that is ultimately referenced by an r-value
+     */
+    virtual ir_variable *variable_referenced() const
+    {
+       return this->record->variable_referenced();
+    }
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    ir_rvalue *record;
+    const char *field;
+ };
+ 
+ 
+ /**
+  * Data stored in an ir_constant
+  */
+ union ir_constant_data {
+       unsigned u[16];
+       int i[16];
+       float f[16];
+       bool b[16];
+       double d[16];
+ };
+ 
+ 
+ class ir_constant : public ir_rvalue {
+ public:
+    ir_constant(const struct glsl_type *type, const ir_constant_data *data);
+    ir_constant(bool b, unsigned vector_elements=1);
+    ir_constant(unsigned int u, unsigned vector_elements=1);
+    ir_constant(int i, unsigned vector_elements=1);
+    ir_constant(float f, unsigned vector_elements=1);
+    ir_constant(double d, unsigned vector_elements=1);
+ 
+    /**
+     * Construct an ir_constant from a list of ir_constant values
+     */
+    ir_constant(const struct glsl_type *type, exec_list *values);
+ 
+    /**
+     * Construct an ir_constant from a scalar component of another ir_constant
+     *
+     * The new \c ir_constant inherits the type of the component from the
+     * source constant.
+     *
+     * \note
+     * In the case of a matrix constant, the new constant is a scalar, \b not
+     * a vector.
+     */
+    ir_constant(const ir_constant *c, unsigned i);
+ 
+    /**
+     * Return a new ir_constant of the specified type containing all zeros.
+     */
+    static ir_constant *zero(void *mem_ctx, const glsl_type *type);
+ 
+    virtual ir_constant *clone(void *mem_ctx, struct hash_table *) const;
+ 
+    virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    virtual bool equals(const ir_instruction *ir,
+                        enum ir_node_type ignore = ir_type_unset) const;
+ 
+    /**
+     * Get a particular component of a constant as a specific type
+     *
+     * This is useful, for example, to get a value from an integer constant
+     * as a float or bool.  This appears frequently when constructors are
+     * called with all constant parameters.
+     */
+    /*@{*/
+    bool get_bool_component(unsigned i) const;
+    float get_float_component(unsigned i) const;
+    double get_double_component(unsigned i) const;
+    int get_int_component(unsigned i) const;
+    unsigned get_uint_component(unsigned i) const;
+    /*@}*/
+ 
+    ir_constant *get_array_element(unsigned i) const;
+ 
+    ir_constant *get_record_field(const char *name);
+ 
+    /**
+     * Copy the values on another constant at a given offset.
+     *
+     * The offset is ignored for array or struct copies, it's only for
+     * scalars or vectors into vectors or matrices.
+     *
+     * With identical types on both sides and zero offset it's clone()
+     * without creating a new object.
+     */
+ 
+    void copy_offset(ir_constant *src, int offset);
+ 
+    /**
+     * Copy the values on another constant at a given offset and
+     * following an assign-like mask.
+     *
+     * The mask is ignored for scalars.
+     *
+     * Note that this function only handles what assign can handle,
+     * i.e. at most a vector as source and a column of a matrix as
+     * destination.
+     */
+ 
+    void copy_masked_offset(ir_constant *src, int offset, unsigned int mask);
+ 
+    /**
+     * Determine whether a constant has the same value as another constant
+     *
+     * \sa ir_constant::is_zero, ir_constant::is_one,
+     * ir_constant::is_negative_one
+     */
+    bool has_value(const ir_constant *) const;
+ 
+    /**
+     * Return true if this ir_constant represents the given value.
+     *
+     * For vectors, this checks that each component is the given value.
+     */
+    virtual bool is_value(float f, int i) const;
+    virtual bool is_zero() const;
+    virtual bool is_one() const;
+    virtual bool is_negative_one() const;
+ 
+    /**
+     * Return true for constants that could be stored as 16-bit unsigned values.
+     *
+     * Note that this will return true even for signed integer ir_constants, as
+     * long as the value is non-negative and fits in 16-bits.
+     */
+    virtual bool is_uint16_constant() const;
+ 
+    /**
+     * Value of the constant.
+     *
+     * The field used to back the values supplied by the constant is determined
+     * by the type associated with the \c ir_instruction.  Constants may be
+     * scalars, vectors, or matrices.
+     */
+    union ir_constant_data value;
+ 
+    /* Array elements */
+    ir_constant **array_elements;
+ 
+    /* Structure fields */
+    exec_list components;
+ 
+ private:
+    /**
+     * Parameterless constructor only used by the clone method
+     */
+    ir_constant(void);
+ };
+ 
+ /**
+  * IR instruction to emit a vertex in a geometry shader.
+  */
+ class ir_emit_vertex : public ir_instruction {
+ public:
+    ir_emit_vertex(ir_rvalue *stream)
+       : ir_instruction(ir_type_emit_vertex),
+         stream(stream)
+    {
+       assert(stream);
+    }
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_emit_vertex *clone(void *mem_ctx, struct hash_table *ht) const
+    {
+       return new(mem_ctx) ir_emit_vertex(this->stream->clone(mem_ctx, ht));
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    int stream_id() const
+    {
+       return stream->as_constant()->value.i[0];
+    }
+ 
+    ir_rvalue *stream;
+ };
+ 
+ /**
+  * IR instruction to complete the current primitive and start a new one in a
+  * geometry shader.
+  */
+ class ir_end_primitive : public ir_instruction {
+ public:
+    ir_end_primitive(ir_rvalue *stream)
+       : ir_instruction(ir_type_end_primitive),
+         stream(stream)
+    {
+       assert(stream);
+    }
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_end_primitive *clone(void *mem_ctx, struct hash_table *ht) const
+    {
+       return new(mem_ctx) ir_end_primitive(this->stream->clone(mem_ctx, ht));
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ 
+    int stream_id() const
+    {
+       return stream->as_constant()->value.i[0];
+    }
+ 
+    ir_rvalue *stream;
+ };
+ 
+ /**
+  * IR instruction for tessellation control and compute shader barrier.
+  */
+ class ir_barrier : public ir_instruction {
+ public:
+    ir_barrier()
+       : ir_instruction(ir_type_barrier)
+    {
+    }
+ 
+    virtual void accept(ir_visitor *v)
+    {
+       v->visit(this);
+    }
+ 
+    virtual ir_barrier *clone(void *mem_ctx, struct hash_table *) const
+    {
+       return new(mem_ctx) ir_barrier();
+    }
+ 
+    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+ };
+ 
+ /*@}*/
+ 
+ /**
+  * Apply a visitor to each IR node in a list
+  */
+ void
+ visit_exec_list(exec_list *list, ir_visitor *visitor);
+ 
+ /**
+  * Validate invariants on each IR node in a list
+  */
+ void validate_ir_tree(exec_list *instructions);
+ 
+ struct _mesa_glsl_parse_state;
+ struct gl_shader_program;
+ 
+ /**
+  * Detect whether an unlinked shader contains static recursion
+  *
+  * If the list of instructions is determined to contain static recursion,
+  * \c _mesa_glsl_error will be called to emit error messages for each function
+  * that is in the recursion cycle.
+  */
+ void
+ detect_recursion_unlinked(struct _mesa_glsl_parse_state *state,
+                         exec_list *instructions);
+ 
+ /**
+  * Detect whether a linked shader contains static recursion
+  *
+  * If the list of instructions is determined to contain static recursion,
+  * \c link_error_printf will be called to emit error messages for each function
+  * that is in the recursion cycle.  In addition,
+  * \c gl_shader_program::LinkStatus will be set to false.
+  */
+ void
+ detect_recursion_linked(struct gl_shader_program *prog,
+                       exec_list *instructions);
+ 
+ /**
+  * Make a clone of each IR instruction in a list
+  *
+  * \param in   List of IR instructions that are to be cloned
+  * \param out  List to hold the cloned instructions
+  */
+ void
+ clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in);
+ 
+ extern void
+ _mesa_glsl_initialize_variables(exec_list *instructions,
+                               struct _mesa_glsl_parse_state *state);
+ 
+ extern void
+ _mesa_glsl_initialize_derived_variables(gl_shader *shader);
+ 
+ extern void
+ _mesa_glsl_initialize_functions(_mesa_glsl_parse_state *state);
+ 
+ extern void
+ _mesa_glsl_initialize_builtin_functions();
+ 
+ extern ir_function_signature *
+ _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state,
+                                  const char *name, exec_list *actual_parameters);
+ 
+ extern ir_function *
+ _mesa_glsl_find_builtin_function_by_name(const char *name);
+ 
+ extern gl_shader *
+ _mesa_glsl_get_builtin_function_shader(void);
+ 
+ extern ir_function_signature *
+ _mesa_get_main_function_signature(gl_shader *sh);
+ 
+ extern void
+ _mesa_glsl_release_functions(void);
+ 
+ extern void
+ _mesa_glsl_release_builtin_functions(void);
+ 
+ extern void
+ reparent_ir(exec_list *list, void *mem_ctx);
+ 
+ struct glsl_symbol_table;
+ 
+ extern void
+ import_prototypes(const exec_list *source, exec_list *dest,
+                 struct glsl_symbol_table *symbols, void *mem_ctx);
+ 
+ extern bool
+ ir_has_call(ir_instruction *ir);
+ 
+ extern void
+ do_set_program_inouts(exec_list *instructions, struct gl_program *prog,
+                       gl_shader_stage shader_stage);
+ 
+ extern char *
+ prototype_string(const glsl_type *return_type, const char *name,
+                exec_list *parameters);
+ 
+ const char *
+ mode_string(const ir_variable *var);
+ 
+ /**
+  * Built-in / reserved GL variables names start with "gl_"
+  */
+ static inline bool
+ is_gl_identifier(const char *s)
+ {
+    return s && s[0] == 'g' && s[1] == 'l' && s[2] == '_';
+ }
+ 
+ extern "C" {
+ #endif /* __cplusplus */
+ 
+ extern void _mesa_print_ir(FILE *f, struct exec_list *instructions,
+                            struct _mesa_glsl_parse_state *state);
+ 
+ extern void
+ fprint_ir(FILE *f, const void *instruction);
+ 
+ #ifdef __cplusplus
+ } /* extern "C" */
+ #endif
+ 
+ unsigned
+ vertices_per_prim(GLenum prim);
+ 
+ #endif /* IR_H */
diff --cc src/compiler/glsl/ir_clone.cpp

index 0000000000000000000000000000000000000000,0965b0d3719d9c611048854f85ff402fba7ee3a4..b32ec17f1afab51231ddc55a2ce88257de966088

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/ir_clone.cpp
+++ b/src/compiler/glsl/ir_clone.cpp
@@@ -1,0 -1,440 +1,441 @@@
+ /*
+  * Copyright © 2010 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ #include <string.h>
+ #include "main/compiler.h"
+ #include "ir.h"
+ #include "compiler/glsl_types.h"
+ #include "program/hash_table.h"
+ 
+ ir_rvalue *
+ ir_rvalue::clone(void *mem_ctx, struct hash_table *) const
+ {
+    /* The only possible instantiation is the generic error value. */
+    return error_value(mem_ctx);
+ }
+ 
+ /**
+  * Duplicate an IR variable
+  */
+ ir_variable *
+ ir_variable::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_variable *var = new(mem_ctx) ir_variable(this->type, this->name,
+                                              (ir_variable_mode) this->data.mode);
+ 
+    var->data.max_array_access = this->data.max_array_access;
+    if (this->is_interface_instance()) {
+       var->u.max_ifc_array_access =
+          rzalloc_array(var, unsigned, this->interface_type->length);
+       memcpy(var->u.max_ifc_array_access, this->u.max_ifc_array_access,
+              this->interface_type->length * sizeof(unsigned));
+    }
+ 
+    memcpy(&var->data, &this->data, sizeof(var->data));
+ 
+    if (this->get_state_slots()) {
+       ir_state_slot *s = var->allocate_state_slots(this->get_num_state_slots());
+       memcpy(s, this->get_state_slots(),
+              sizeof(s[0]) * var->get_num_state_slots());
+    }
+ 
+    if (this->constant_value)
+       var->constant_value = this->constant_value->clone(mem_ctx, ht);
+ 
+    if (this->constant_initializer)
+       var->constant_initializer =
+        this->constant_initializer->clone(mem_ctx, ht);
+ 
+    var->interface_type = this->interface_type;
+ 
+    if (ht) {
+       hash_table_insert(ht, var, (void *)const_cast<ir_variable *>(this));
+    }
+ 
+    return var;
+ }
+ 
+ ir_swizzle *
+ ir_swizzle::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    return new(mem_ctx) ir_swizzle(this->val->clone(mem_ctx, ht), this->mask);
+ }
+ 
+ ir_return *
+ ir_return::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_rvalue *new_value = NULL;
+ 
+    if (this->value)
+       new_value = this->value->clone(mem_ctx, ht);
+ 
+    return new(mem_ctx) ir_return(new_value);
+ }
+ 
+ ir_discard *
+ ir_discard::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_rvalue *new_condition = NULL;
+ 
+    if (this->condition != NULL)
+       new_condition = this->condition->clone(mem_ctx, ht);
+ 
+    return new(mem_ctx) ir_discard(new_condition);
+ }
+ 
+ ir_loop_jump *
+ ir_loop_jump::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    (void)ht;
+ 
+    return new(mem_ctx) ir_loop_jump(this->mode);
+ }
+ 
+ ir_if *
+ ir_if::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_if *new_if = new(mem_ctx) ir_if(this->condition->clone(mem_ctx, ht));
+ 
+    foreach_in_list(ir_instruction, ir, &this->then_instructions) {
+       new_if->then_instructions.push_tail(ir->clone(mem_ctx, ht));
+    }
+ 
+    foreach_in_list(ir_instruction, ir, &this->else_instructions) {
+       new_if->else_instructions.push_tail(ir->clone(mem_ctx, ht));
+    }
+ 
+    return new_if;
+ }
+ 
+ ir_loop *
+ ir_loop::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_loop *new_loop = new(mem_ctx) ir_loop();
+ 
+    foreach_in_list(ir_instruction, ir, &this->body_instructions) {
+       new_loop->body_instructions.push_tail(ir->clone(mem_ctx, ht));
+    }
+ 
+    return new_loop;
+ }
+ 
+ ir_call *
+ ir_call::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_dereference_variable *new_return_ref = NULL;
+    if (this->return_deref != NULL)
+       new_return_ref = this->return_deref->clone(mem_ctx, ht);
+ 
+    exec_list new_parameters;
+ 
+    foreach_in_list(ir_instruction, ir, &this->actual_parameters) {
+       new_parameters.push_tail(ir->clone(mem_ctx, ht));
+    }
+ 
+    return new(mem_ctx) ir_call(this->callee, new_return_ref, &new_parameters);
+ }
+ 
+ ir_expression *
+ ir_expression::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_rvalue *op[ARRAY_SIZE(this->operands)] = { NULL, };
+    unsigned int i;
+ 
+    for (i = 0; i < get_num_operands(); i++) {
+       op[i] = this->operands[i]->clone(mem_ctx, ht);
+    }
+ 
+    return new(mem_ctx) ir_expression(this->operation, this->type,
+                                    op[0], op[1], op[2], op[3]);
+ }
+ 
+ ir_dereference_variable *
+ ir_dereference_variable::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_variable *new_var;
+ 
+    if (ht) {
+       new_var = (ir_variable *)hash_table_find(ht, this->var);
+       if (!new_var)
+        new_var = this->var;
+    } else {
+       new_var = this->var;
+    }
+ 
+    return new(mem_ctx) ir_dereference_variable(new_var);
+ }
+ 
+ ir_dereference_array *
+ ir_dereference_array::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    return new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, ht),
+                                           this->array_index->clone(mem_ctx,
+                                                                    ht));
+ }
+ 
+ ir_dereference_record *
+ ir_dereference_record::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    return new(mem_ctx) ir_dereference_record(this->record->clone(mem_ctx, ht),
+                                            this->field);
+ }
+ 
+ ir_texture *
+ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_texture *new_tex = new(mem_ctx) ir_texture(this->op);
+    new_tex->type = this->type;
+ 
+    new_tex->sampler = this->sampler->clone(mem_ctx, ht);
+    if (this->coordinate)
+       new_tex->coordinate = this->coordinate->clone(mem_ctx, ht);
+    if (this->projector)
+       new_tex->projector = this->projector->clone(mem_ctx, ht);
+    if (this->shadow_comparitor) {
+       new_tex->shadow_comparitor = this->shadow_comparitor->clone(mem_ctx, ht);
+    }
+ 
+    if (this->offset != NULL)
+       new_tex->offset = this->offset->clone(mem_ctx, ht);
+ 
+    switch (this->op) {
+    case ir_tex:
+    case ir_lod:
+    case ir_query_levels:
+    case ir_texture_samples:
+    case ir_samples_identical:
+       break;
+    case ir_txb:
+       new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
+       break;
+    case ir_txl:
+    case ir_txf:
+    case ir_txs:
+       new_tex->lod_info.lod = this->lod_info.lod->clone(mem_ctx, ht);
+       break;
+    case ir_txf_ms:
+       new_tex->lod_info.sample_index = this->lod_info.sample_index->clone(mem_ctx, ht);
+       break;
+    case ir_txd:
+       new_tex->lod_info.grad.dPdx = this->lod_info.grad.dPdx->clone(mem_ctx, ht);
+       new_tex->lod_info.grad.dPdy = this->lod_info.grad.dPdy->clone(mem_ctx, ht);
+       break;
+    case ir_tg4:
+       new_tex->lod_info.component = this->lod_info.component->clone(mem_ctx, ht);
+       break;
+    }
+ 
+    return new_tex;
+ }
+ 
+ ir_assignment *
+ ir_assignment::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_rvalue *new_condition = NULL;
+ 
+    if (this->condition)
+       new_condition = this->condition->clone(mem_ctx, ht);
+ 
+    ir_assignment *cloned =
+       new(mem_ctx) ir_assignment(this->lhs->clone(mem_ctx, ht),
+                                  this->rhs->clone(mem_ctx, ht),
+                                  new_condition);
+    cloned->write_mask = this->write_mask;
+    return cloned;
+ }
+ 
+ ir_function *
+ ir_function::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_function *copy = new(mem_ctx) ir_function(this->name);
+ 
+    copy->is_subroutine = this->is_subroutine;
+    copy->subroutine_index = this->subroutine_index;
+    copy->num_subroutine_types = this->num_subroutine_types;
+    copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types);
+    for (int i = 0; i < copy->num_subroutine_types; i++)
+      copy->subroutine_types[i] = this->subroutine_types[i];
+ 
+    foreach_in_list(const ir_function_signature, sig, &this->signatures) {
+       ir_function_signature *sig_copy = sig->clone(mem_ctx, ht);
+       copy->add_signature(sig_copy);
+ 
+       if (ht != NULL)
+        hash_table_insert(ht, sig_copy,
+                          (void *)const_cast<ir_function_signature *>(sig));
+    }
+ 
+    return copy;
+ }
+ 
+ ir_function_signature *
+ ir_function_signature::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_function_signature *copy = this->clone_prototype(mem_ctx, ht);
+ 
+    copy->is_defined = this->is_defined;
+ 
+    /* Clone the instruction list.
+     */
+    foreach_in_list(const ir_instruction, inst, &this->body) {
+       ir_instruction *const inst_copy = inst->clone(mem_ctx, ht);
+       copy->body.push_tail(inst_copy);
+    }
+ 
+    return copy;
+ }
+ 
+ ir_function_signature *
+ ir_function_signature::clone_prototype(void *mem_ctx, struct hash_table *ht) const
+ {
+    ir_function_signature *copy =
+       new(mem_ctx) ir_function_signature(this->return_type);
+ 
+    copy->is_defined = false;
+    copy->builtin_avail = this->builtin_avail;
+    copy->origin = this;
+ 
+    /* Clone the parameter list, but NOT the body.
+     */
+    foreach_in_list(const ir_variable, param, &this->parameters) {
+       assert(const_cast<ir_variable *>(param)->as_variable() != NULL);
+ 
+       ir_variable *const param_copy = param->clone(mem_ctx, ht);
+       copy->parameters.push_tail(param_copy);
+    }
+ 
+    return copy;
+ }
+ 
+ ir_constant *
+ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const
+ {
+    (void)ht;
+ 
+    switch (this->type->base_type) {
+    case GLSL_TYPE_UINT:
+    case GLSL_TYPE_INT:
+    case GLSL_TYPE_FLOAT:
+    case GLSL_TYPE_DOUBLE:
+    case GLSL_TYPE_BOOL:
+       return new(mem_ctx) ir_constant(this->type, &this->value);
+ 
+    case GLSL_TYPE_STRUCT: {
+       ir_constant *c = new(mem_ctx) ir_constant;
+ 
+       c->type = this->type;
+       for (exec_node *node = this->components.head
+             ; !node->is_tail_sentinel()
+             ; node = node->next) {
+        ir_constant *const orig = (ir_constant *) node;
+ 
+        c->components.push_tail(orig->clone(mem_ctx, NULL));
+       }
+ 
+       return c;
+    }
+ 
+    case GLSL_TYPE_ARRAY: {
+       ir_constant *c = new(mem_ctx) ir_constant;
+ 
+       c->type = this->type;
+       c->array_elements = ralloc_array(c, ir_constant *, this->type->length);
+       for (unsigned i = 0; i < this->type->length; i++) {
+        c->array_elements[i] = this->array_elements[i]->clone(mem_ctx, NULL);
+       }
+       return c;
+    }
+ 
++   case GLSL_TYPE_FUNCTION:
+    case GLSL_TYPE_SAMPLER:
+    case GLSL_TYPE_IMAGE:
+    case GLSL_TYPE_ATOMIC_UINT:
+    case GLSL_TYPE_VOID:
+    case GLSL_TYPE_ERROR:
+    case GLSL_TYPE_SUBROUTINE:
+    case GLSL_TYPE_INTERFACE:
+       assert(!"Should not get here.");
+       break;
+    }
+ 
+    return NULL;
+ }
+ 
+ 
+ class fixup_ir_call_visitor : public ir_hierarchical_visitor {
+ public:
+    fixup_ir_call_visitor(struct hash_table *ht)
+    {
+       this->ht = ht;
+    }
+ 
+    virtual ir_visitor_status visit_enter(ir_call *ir)
+    {
+       /* Try to find the function signature referenced by the ir_call in the
+        * table.  If it is found, replace it with the value from the table.
+        */
+       ir_function_signature *sig =
+        (ir_function_signature *) hash_table_find(this->ht, ir->callee);
+       if (sig != NULL)
+        ir->callee = sig;
+ 
+       /* Since this may be used before function call parameters are flattened,
+        * the children also need to be processed.
+        */
+       return visit_continue;
+    }
+ 
+ private:
+    struct hash_table *ht;
+ };
+ 
+ 
+ static void
+ fixup_function_calls(struct hash_table *ht, exec_list *instructions)
+ {
+    fixup_ir_call_visitor v(ht);
+    v.run(instructions);
+ }
+ 
+ 
+ void
+ clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in)
+ {
+    struct hash_table *ht =
+       hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare);
+ 
+    foreach_in_list(const ir_instruction, original, in) {
+       ir_instruction *copy = original->clone(mem_ctx, ht);
+ 
+       out->push_tail(copy);
+    }
+ 
+    /* Make a pass over the cloned tree to fix up ir_call nodes to point to the
+     * cloned ir_function_signature nodes.  This cannot be done automatically
+     * during cloning because the ir_call might be a forward reference (i.e.,
+     * the function signature that it references may not have been cloned yet).
+     */
+    fixup_function_calls(ht, out);
+ 
+    hash_table_dtor(ht);
+ }
diff --cc src/compiler/glsl/ir_optimization.h

index 0000000000000000000000000000000000000000,be86f547f7752a599d91ace7bfeeba55b23a8714..b56413a1500f67f7ffb96c21a7f966a3d4755833

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@@ -1,0 -1,147 +1,144 @@@
- -   LOWER_PACK_HALF_2x16_TO_SPLIT        = 0x0040,
- -   LOWER_UNPACK_HALF_2x16_TO_SPLIT      = 0x0080,
+ /*
+  * Copyright © 2010 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ 
+ /**
+  * \file ir_optimization.h
+  *
+  * Prototypes for optimization passes to be called by the compiler and drivers.
+  */
+ 
+ /* Operations for lower_instructions() */
+ #define SUB_TO_ADD_NEG     0x01
+ #define DIV_TO_MUL_RCP     0x02
+ #define EXP_TO_EXP2        0x04
+ #define POW_TO_EXP2        0x08
+ #define LOG_TO_LOG2        0x10
+ #define MOD_TO_FLOOR       0x20
+ #define INT_DIV_TO_MUL_RCP 0x40
+ #define LDEXP_TO_ARITH     0x80
+ #define CARRY_TO_ARITH     0x100
+ #define BORROW_TO_ARITH    0x200
+ #define SAT_TO_CLAMP       0x400
+ #define DOPS_TO_DFRAC      0x800
+ #define DFREXP_DLDEXP_TO_ARITH    0x1000
+ 
+ /**
+  * \see class lower_packing_builtins_visitor
+  */
+ enum lower_packing_builtins_op {
+    LOWER_PACK_UNPACK_NONE               = 0x0000,
+ 
+    LOWER_PACK_SNORM_2x16                = 0x0001,
+    LOWER_UNPACK_SNORM_2x16              = 0x0002,
+ 
+    LOWER_PACK_UNORM_2x16                = 0x0004,
+    LOWER_UNPACK_UNORM_2x16              = 0x0008,
+ 
+    LOWER_PACK_HALF_2x16                 = 0x0010,
+    LOWER_UNPACK_HALF_2x16               = 0x0020,
+ 
- -   LOWER_PACK_SNORM_4x8                 = 0x0100,
- -   LOWER_UNPACK_SNORM_4x8               = 0x0200,
++   LOWER_PACK_SNORM_4x8                 = 0x0040,
++   LOWER_UNPACK_SNORM_4x8               = 0x0080,
+ 
- -   LOWER_PACK_UNORM_4x8                 = 0x0400,
- -   LOWER_UNPACK_UNORM_4x8               = 0x0800,
- -
- -   LOWER_PACK_USE_BFI                   = 0x1000,
- -   LOWER_PACK_USE_BFE                   = 0x2000,
++   LOWER_PACK_UNORM_4x8                 = 0x0100,
++   LOWER_UNPACK_UNORM_4x8               = 0x0200,
+ 
++   LOWER_PACK_USE_BFI                   = 0x0400,
++   LOWER_PACK_USE_BFE                   = 0x0800,
+ };
+ 
+ bool do_common_optimization(exec_list *ir, bool linked,
+                           bool uniform_locations_assigned,
+                             const struct gl_shader_compiler_options *options,
+                             bool native_integers);
+ 
+ bool do_rebalance_tree(exec_list *instructions);
+ bool do_algebraic(exec_list *instructions, bool native_integers,
+                   const struct gl_shader_compiler_options *options);
+ bool opt_conditional_discard(exec_list *instructions);
+ bool do_constant_folding(exec_list *instructions);
+ bool do_constant_variable(exec_list *instructions);
+ bool do_constant_variable_unlinked(exec_list *instructions);
+ bool do_copy_propagation(exec_list *instructions);
+ bool do_copy_propagation_elements(exec_list *instructions);
+ bool do_constant_propagation(exec_list *instructions);
+ void do_dead_builtin_varyings(struct gl_context *ctx,
+                               gl_shader *producer, gl_shader *consumer,
+                               unsigned num_tfeedback_decls,
+                               class tfeedback_decl *tfeedback_decls);
+ bool do_dead_code(exec_list *instructions, bool uniform_locations_assigned);
+ bool do_dead_code_local(exec_list *instructions);
+ bool do_dead_code_unlinked(exec_list *instructions);
+ bool do_dead_functions(exec_list *instructions);
+ bool opt_flip_matrices(exec_list *instructions);
+ bool do_function_inlining(exec_list *instructions);
+ bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false);
+ bool do_lower_texture_projection(exec_list *instructions);
+ bool do_if_simplification(exec_list *instructions);
+ bool opt_flatten_nested_if_blocks(exec_list *instructions);
+ bool do_discard_simplification(exec_list *instructions);
+ bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0);
+ bool do_mat_op_to_vec(exec_list *instructions);
+ bool do_minmax_prune(exec_list *instructions);
+ bool do_noop_swizzle(exec_list *instructions);
+ bool do_structure_splitting(exec_list *instructions);
+ bool do_swizzle_swizzle(exec_list *instructions);
+ bool do_vectorize(exec_list *instructions);
+ bool do_tree_grafting(exec_list *instructions);
+ bool do_vec_index_to_cond_assign(exec_list *instructions);
+ bool do_vec_index_to_swizzle(exec_list *instructions);
+ bool lower_discard(exec_list *instructions);
+ void lower_discard_flow(exec_list *instructions);
+ bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
+ bool lower_noise(exec_list *instructions);
+ bool lower_variable_index_to_cond_assign(gl_shader_stage stage,
+     exec_list *instructions, bool lower_input, bool lower_output,
+     bool lower_temp, bool lower_uniform);
+ bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
+ bool lower_const_arrays_to_uniforms(exec_list *instructions);
+ bool lower_clip_distance(gl_shader *shader);
+ void lower_output_reads(unsigned stage, exec_list *instructions);
+ bool lower_packing_builtins(exec_list *instructions, int op_mask);
+ void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);
+ void lower_ubo_reference(struct gl_shader *shader);
+ void lower_packed_varyings(void *mem_ctx,
+                            unsigned locations_used, ir_variable_mode mode,
+                            unsigned gs_input_vertices, gl_shader *shader);
+ bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
+ bool lower_vector_derefs(gl_shader *shader);
+ void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader);
+ bool optimize_redundant_jumps(exec_list *instructions);
+ bool optimize_split_arrays(exec_list *instructions, bool linked);
+ bool lower_offset_arrays(exec_list *instructions);
+ void optimize_dead_builtin_variables(exec_list *instructions,
+                                      enum ir_variable_mode other);
+ bool lower_tess_level(gl_shader *shader);
+ 
+ bool lower_vertex_id(gl_shader *shader);
+ 
+ bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
+ 
+ ir_rvalue *
+ compare_index_block(exec_list *instructions, ir_variable *index,
+                   unsigned base, unsigned components, void *mem_ctx);
diff --cc src/compiler/glsl/ir_validate.cpp

index 0000000000000000000000000000000000000000,cad7069bf98fab369d1bfb605ec03cdabb8b24d7..2ec5a3f73f73adcca33bbfe6db354e3eaf5a3fcd

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/ir_validate.cpp
+++ b/src/compiler/glsl/ir_validate.cpp
@@@ -1,0 -1,930 +1,918 @@@
- -   case ir_unop_unpack_half_2x16_split_x:
- -   case ir_unop_unpack_half_2x16_split_y:
- -      assert(ir->type == glsl_type::float_type);
- -      assert(ir->operands[0]->type == glsl_type::uint_type);
- -      break;
- -
+ /*
+  * Copyright © 2010 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ /**
+  * \file ir_validate.cpp
+  *
+  * Attempts to verify that various invariants of the IR tree are true.
+  *
+  * In particular, at the moment it makes sure that no single
+  * ir_instruction node except for ir_variable appears multiple times
+  * in the ir tree.  ir_variable does appear multiple times: Once as a
+  * declaration in an exec_list, and multiple times as the endpoint of
+  * a dereference chain.
+  */
+ 
+ #include "ir.h"
+ #include "ir_hierarchical_visitor.h"
+ #include "util/hash_table.h"
+ #include "util/set.h"
+ #include "compiler/glsl_types.h"
+ 
+ namespace {
+ 
+ class ir_validate : public ir_hierarchical_visitor {
+ public:
+    ir_validate()
+    {
+       this->ir_set = _mesa_set_create(NULL, _mesa_hash_pointer,
+                                       _mesa_key_pointer_equal);
+ 
+       this->current_function = NULL;
+ 
+       this->callback_enter = ir_validate::validate_ir;
+       this->data_enter = ir_set;
+    }
+ 
+    ~ir_validate()
+    {
+       _mesa_set_destroy(this->ir_set, NULL);
+    }
+ 
+    virtual ir_visitor_status visit(ir_variable *v);
+    virtual ir_visitor_status visit(ir_dereference_variable *ir);
+ 
+    virtual ir_visitor_status visit_enter(ir_discard *ir);
+    virtual ir_visitor_status visit_enter(ir_if *ir);
+ 
+    virtual ir_visitor_status visit_enter(ir_function *ir);
+    virtual ir_visitor_status visit_leave(ir_function *ir);
+    virtual ir_visitor_status visit_enter(ir_function_signature *ir);
+ 
+    virtual ir_visitor_status visit_leave(ir_expression *ir);
+    virtual ir_visitor_status visit_leave(ir_swizzle *ir);
+ 
+    virtual ir_visitor_status visit_enter(class ir_dereference_array *);
+ 
+    virtual ir_visitor_status visit_enter(ir_assignment *ir);
+    virtual ir_visitor_status visit_enter(ir_call *ir);
+ 
+    static void validate_ir(ir_instruction *ir, void *data);
+ 
+    ir_function *current_function;
+ 
+    struct set *ir_set;
+ };
+ 
+ } /* anonymous namespace */
+ 
+ ir_visitor_status
+ ir_validate::visit(ir_dereference_variable *ir)
+ {
+    if ((ir->var == NULL) || (ir->var->as_variable() == NULL)) {
+       printf("ir_dereference_variable @ %p does not specify a variable %p\n",
+            (void *) ir, (void *) ir->var);
+       abort();
+    }
+ 
+    if (_mesa_set_search(ir_set, ir->var) == NULL) {
+       printf("ir_dereference_variable @ %p specifies undeclared variable "
+            "`%s' @ %p\n",
+            (void *) ir, ir->var->name, (void *) ir->var);
+       abort();
+    }
+ 
+    this->validate_ir(ir, this->data_enter);
+ 
+    return visit_continue;
+ }
+ 
+ ir_visitor_status
+ ir_validate::visit_enter(class ir_dereference_array *ir)
+ {
+    if (!ir->array->type->is_array() && !ir->array->type->is_matrix() &&
+       !ir->array->type->is_vector()) {
+       printf("ir_dereference_array @ %p does not specify an array, a vector "
+              "or a matrix\n",
+              (void *) ir);
+       ir->print();
+       printf("\n");
+       abort();
+    }
+ 
+    if (!ir->array_index->type->is_scalar()) {
+       printf("ir_dereference_array @ %p does not have scalar index: %s\n",
+              (void *) ir, ir->array_index->type->name);
+       abort();
+    }
+ 
+    if (!ir->array_index->type->is_integer()) {
+       printf("ir_dereference_array @ %p does not have integer index: %s\n",
+              (void *) ir, ir->array_index->type->name);
+       abort();
+    }
+ 
+    return visit_continue;
+ }
+ 
+ ir_visitor_status
+ ir_validate::visit_enter(ir_discard *ir)
+ {
+    if (ir->condition && ir->condition->type != glsl_type::bool_type) {
+       printf("ir_discard condition %s type instead of bool.\n",
+            ir->condition->type->name);
+       ir->print();
+       printf("\n");
+       abort();
+    }
+ 
+    return visit_continue;
+ }
+ 
+ ir_visitor_status
+ ir_validate::visit_enter(ir_if *ir)
+ {
+    if (ir->condition->type != glsl_type::bool_type) {
+       printf("ir_if condition %s type instead of bool.\n",
+            ir->condition->type->name);
+       ir->print();
+       printf("\n");
+       abort();
+    }
+ 
+    return visit_continue;
+ }
+ 
+ 
+ ir_visitor_status
+ ir_validate::visit_enter(ir_function *ir)
+ {
+    /* Function definitions cannot be nested.
+     */
+    if (this->current_function != NULL) {
+       printf("Function definition nested inside another function "
+            "definition:\n");
+       printf("%s %p inside %s %p\n",
+            ir->name, (void *) ir,
+            this->current_function->name, (void *) this->current_function);
+       abort();
+    }
+ 
+    /* Store the current function hierarchy being traversed.  This is used
+     * by the function signature visitor to ensure that the signatures are
+     * linked with the correct functions.
+     */
+    this->current_function = ir;
+ 
+    this->validate_ir(ir, this->data_enter);
+ 
+    /* Verify that all of the things stored in the list of signatures are,
+     * in fact, function signatures.
+     */
+    foreach_in_list(ir_instruction, sig, &ir->signatures) {
+       if (sig->ir_type != ir_type_function_signature) {
+        printf("Non-signature in signature list of function `%s'\n",
+               ir->name);
+        abort();
+       }
+    }
+ 
+    return visit_continue;
+ }
+ 
+ ir_visitor_status
+ ir_validate::visit_leave(ir_function *ir)
+ {
+    assert(ralloc_parent(ir->name) == ir);
+ 
+    this->current_function = NULL;
+    return visit_continue;
+ }
+ 
+ ir_visitor_status
+ ir_validate::visit_enter(ir_function_signature *ir)
+ {
+    if (this->current_function != ir->function()) {
+       printf("Function signature nested inside wrong function "
+            "definition:\n");
+       printf("%p inside %s %p instead of %s %p\n",
+            (void *) ir,
+            this->current_function->name, (void *) this->current_function,
+            ir->function_name(), (void *) ir->function());
+       abort();
+    }
+ 
+    if (ir->return_type == NULL) {
+       printf("Function signature %p for function %s has NULL return type.\n",
+            (void *) ir, ir->function_name());
+       abort();
+    }
+ 
+    this->validate_ir(ir, this->data_enter);
+ 
+    return visit_continue;
+ }
+ 
+ ir_visitor_status
+ ir_validate::visit_leave(ir_expression *ir)
+ {
+    switch (ir->operation) {
+    case ir_unop_bit_not:
+       assert(ir->operands[0]->type == ir->type);
+       break;
+    case ir_unop_logic_not:
+       assert(ir->type->base_type == GLSL_TYPE_BOOL);
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
+       break;
+ 
+    case ir_unop_neg:
+    case ir_unop_abs:
+    case ir_unop_sign:
+    case ir_unop_rcp:
+    case ir_unop_rsq:
+    case ir_unop_sqrt:
+       assert(ir->type == ir->operands[0]->type);
+       break;
+ 
+    case ir_unop_exp:
+    case ir_unop_log:
+    case ir_unop_exp2:
+    case ir_unop_log2:
+    case ir_unop_saturate:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+       assert(ir->type == ir->operands[0]->type);
+       break;
+ 
+    case ir_unop_f2i:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+       assert(ir->type->base_type == GLSL_TYPE_INT);
+       break;
+    case ir_unop_f2u:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+       assert(ir->type->base_type == GLSL_TYPE_UINT);
+       break;
+    case ir_unop_i2f:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+       assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+       break;
+    case ir_unop_f2b:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+       assert(ir->type->base_type == GLSL_TYPE_BOOL);
+       break;
+    case ir_unop_b2f:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
+       assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+       break;
+    case ir_unop_i2b:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+       assert(ir->type->base_type == GLSL_TYPE_BOOL);
+       break;
+    case ir_unop_b2i:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
+       assert(ir->type->base_type == GLSL_TYPE_INT);
+       break;
+    case ir_unop_u2f:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+       assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+       break;
+    case ir_unop_i2u:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+       assert(ir->type->base_type == GLSL_TYPE_UINT);
+       break;
+    case ir_unop_u2i:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+       assert(ir->type->base_type == GLSL_TYPE_INT);
+       break;
+    case ir_unop_bitcast_i2f:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+       assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+       break;
+    case ir_unop_bitcast_f2i:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+       assert(ir->type->base_type == GLSL_TYPE_INT);
+       break;
+    case ir_unop_bitcast_u2f:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+       assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+       break;
+    case ir_unop_bitcast_f2u:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+       assert(ir->type->base_type == GLSL_TYPE_UINT);
+       break;
+ 
+    case ir_unop_trunc:
+    case ir_unop_round_even:
+    case ir_unop_ceil:
+    case ir_unop_floor:
+    case ir_unop_fract:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+              ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+       assert(ir->operands[0]->type == ir->type);
+       break;
+    case ir_unop_sin:
+    case ir_unop_cos:
+    case ir_unop_dFdx:
+    case ir_unop_dFdx_coarse:
+    case ir_unop_dFdx_fine:
+    case ir_unop_dFdy:
+    case ir_unop_dFdy_coarse:
+    case ir_unop_dFdy_fine:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+       assert(ir->operands[0]->type == ir->type);
+       break;
+ 
+    case ir_unop_pack_snorm_2x16:
+    case ir_unop_pack_unorm_2x16:
+    case ir_unop_pack_half_2x16:
+       assert(ir->type == glsl_type::uint_type);
+       assert(ir->operands[0]->type == glsl_type::vec2_type);
+       break;
+ 
+    case ir_unop_pack_snorm_4x8:
+    case ir_unop_pack_unorm_4x8:
+       assert(ir->type == glsl_type::uint_type);
+       assert(ir->operands[0]->type == glsl_type::vec4_type);
+       break;
+ 
+    case ir_unop_pack_double_2x32:
+       assert(ir->type == glsl_type::double_type);
+       assert(ir->operands[0]->type == glsl_type::uvec2_type);
+       break;
+ 
+    case ir_unop_unpack_snorm_2x16:
+    case ir_unop_unpack_unorm_2x16:
+    case ir_unop_unpack_half_2x16:
+       assert(ir->type == glsl_type::vec2_type);
+       assert(ir->operands[0]->type == glsl_type::uint_type);
+       break;
+ 
+    case ir_unop_unpack_snorm_4x8:
+    case ir_unop_unpack_unorm_4x8:
+       assert(ir->type == glsl_type::vec4_type);
+       assert(ir->operands[0]->type == glsl_type::uint_type);
+       break;
+ 
- -   case ir_binop_pack_half_2x16_split:
- -      assert(ir->type == glsl_type::uint_type);
- -      assert(ir->operands[0]->type == glsl_type::float_type);
- -      assert(ir->operands[1]->type == glsl_type::float_type);
- -      break;
- -
+    case ir_unop_unpack_double_2x32:
+       assert(ir->type == glsl_type::uvec2_type);
+       assert(ir->operands[0]->type == glsl_type::double_type);
+       break;
+ 
+    case ir_unop_bitfield_reverse:
+       assert(ir->operands[0]->type == ir->type);
+       assert(ir->type->is_integer());
+       break;
+ 
+    case ir_unop_bit_count:
+    case ir_unop_find_msb:
+    case ir_unop_find_lsb:
+       assert(ir->operands[0]->type->vector_elements == ir->type->vector_elements);
+       assert(ir->operands[0]->type->is_integer());
+       assert(ir->type->base_type == GLSL_TYPE_INT);
+       break;
+ 
+    case ir_unop_noise:
+       /* XXX what can we assert here? */
+       break;
+ 
+    case ir_unop_interpolate_at_centroid:
+       assert(ir->operands[0]->type == ir->type);
+       assert(ir->operands[0]->type->is_float());
+       break;
+ 
+    case ir_unop_get_buffer_size:
+       assert(ir->type == glsl_type::int_type);
+       assert(ir->operands[0]->type == glsl_type::uint_type);
+       break;
+ 
+    case ir_unop_ssbo_unsized_array_length:
+       assert(ir->type == glsl_type::int_type);
+       assert(ir->operands[0]->type->is_array());
+       assert(ir->operands[0]->type->is_unsized_array());
+       break;
+ 
+    case ir_unop_d2f:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+       assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+       break;
+    case ir_unop_f2d:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+       assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+       break;
+    case ir_unop_d2i:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+       assert(ir->type->base_type == GLSL_TYPE_INT);
+       break;
+    case ir_unop_i2d:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+       assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+       break;
+    case ir_unop_d2u:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+       assert(ir->type->base_type == GLSL_TYPE_UINT);
+       break;
+    case ir_unop_u2d:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+       assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+       break;
+    case ir_unop_d2b:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+       assert(ir->type->base_type == GLSL_TYPE_BOOL);
+       break;
+ 
+    case ir_unop_frexp_sig:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+              ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+       assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+       break;
+    case ir_unop_frexp_exp:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+              ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+       assert(ir->type->base_type == GLSL_TYPE_INT);
+       break;
+    case ir_unop_subroutine_to_int:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_SUBROUTINE);
+       assert(ir->type->base_type == GLSL_TYPE_INT);
+       break;
+    case ir_binop_add:
+    case ir_binop_sub:
+    case ir_binop_mul:
+    case ir_binop_div:
+    case ir_binop_mod:
+    case ir_binop_min:
+    case ir_binop_max:
+    case ir_binop_pow:
+       assert(ir->operands[0]->type->base_type ==
+              ir->operands[1]->type->base_type);
+ 
+       if (ir->operands[0]->type->is_scalar())
+        assert(ir->operands[1]->type == ir->type);
+       else if (ir->operands[1]->type->is_scalar())
+        assert(ir->operands[0]->type == ir->type);
+       else if (ir->operands[0]->type->is_vector() &&
+              ir->operands[1]->type->is_vector()) {
+        assert(ir->operands[0]->type == ir->operands[1]->type);
+        assert(ir->operands[0]->type == ir->type);
+       }
+       break;
+ 
+    case ir_binop_imul_high:
+       assert(ir->type == ir->operands[0]->type);
+       assert(ir->type == ir->operands[1]->type);
+       assert(ir->type->is_integer());
+       break;
+ 
+    case ir_binop_carry:
+    case ir_binop_borrow:
+       assert(ir->type == ir->operands[0]->type);
+       assert(ir->type == ir->operands[1]->type);
+       assert(ir->type->base_type == GLSL_TYPE_UINT);
+       break;
+ 
+    case ir_binop_less:
+    case ir_binop_greater:
+    case ir_binop_lequal:
+    case ir_binop_gequal:
+    case ir_binop_equal:
+    case ir_binop_nequal:
+       /* The semantics of the IR operators differ from the GLSL <, >, <=, >=,
+        * ==, and != operators.  The IR operators perform a component-wise
+        * comparison on scalar or vector types and return a boolean scalar or
+        * vector type of the same size.
+        */
+       assert(ir->type->base_type == GLSL_TYPE_BOOL);
+       assert(ir->operands[0]->type == ir->operands[1]->type);
+       assert(ir->operands[0]->type->is_vector()
+            || ir->operands[0]->type->is_scalar());
+       assert(ir->operands[0]->type->vector_elements
+            == ir->type->vector_elements);
+       break;
+ 
+    case ir_binop_all_equal:
+    case ir_binop_any_nequal:
+       /* GLSL == and != operate on scalars, vectors, matrices and arrays, and
+        * return a scalar boolean.  The IR matches that.
+        */
+       assert(ir->type == glsl_type::bool_type);
+       assert(ir->operands[0]->type == ir->operands[1]->type);
+       break;
+ 
+    case ir_binop_lshift:
+    case ir_binop_rshift:
+       assert(ir->operands[0]->type->is_integer() &&
+              ir->operands[1]->type->is_integer());
+       if (ir->operands[0]->type->is_scalar()) {
+           assert(ir->operands[1]->type->is_scalar());
+       }
+       if (ir->operands[0]->type->is_vector() &&
+           ir->operands[1]->type->is_vector()) {
+           assert(ir->operands[0]->type->components() ==
+                  ir->operands[1]->type->components());
+       }
+       assert(ir->type == ir->operands[0]->type);
+       break;
+ 
+    case ir_binop_bit_and:
+    case ir_binop_bit_xor:
+    case ir_binop_bit_or:
+        assert(ir->operands[0]->type->base_type ==
+               ir->operands[1]->type->base_type);
+        assert(ir->type->is_integer());
+        if (ir->operands[0]->type->is_vector() &&
+            ir->operands[1]->type->is_vector()) {
+            assert(ir->operands[0]->type->vector_elements ==
+                   ir->operands[1]->type->vector_elements);
+        }
+        break;
+ 
+    case ir_binop_logic_and:
+    case ir_binop_logic_xor:
+    case ir_binop_logic_or:
+       assert(ir->type->base_type == GLSL_TYPE_BOOL);
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
+       assert(ir->operands[1]->type->base_type == GLSL_TYPE_BOOL);
+       break;
+ 
+    case ir_binop_dot:
+       assert(ir->type == glsl_type::float_type ||
+              ir->type == glsl_type::double_type);
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+              ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+       assert(ir->operands[0]->type->is_vector());
+       assert(ir->operands[0]->type == ir->operands[1]->type);
+       break;
+ 
+    case ir_binop_ubo_load:
+       assert(ir->operands[0]->type == glsl_type::uint_type);
+ 
+       assert(ir->operands[1]->type == glsl_type::uint_type);
+       break;
+ 
+    case ir_binop_ldexp:
+       assert(ir->operands[0]->type == ir->type);
+       assert(ir->operands[0]->type->is_float() ||
+              ir->operands[0]->type->is_double());
+       assert(ir->operands[1]->type->base_type == GLSL_TYPE_INT);
+       assert(ir->operands[0]->type->components() ==
+              ir->operands[1]->type->components());
+       break;
+ 
+    case ir_binop_vector_extract:
+       assert(ir->operands[0]->type->is_vector());
+       assert(ir->operands[1]->type->is_scalar()
+              && ir->operands[1]->type->is_integer());
+       break;
+ 
+    case ir_binop_interpolate_at_offset:
+       assert(ir->operands[0]->type == ir->type);
+       assert(ir->operands[0]->type->is_float());
+       assert(ir->operands[1]->type->components() == 2);
+       assert(ir->operands[1]->type->is_float());
+       break;
+ 
+    case ir_binop_interpolate_at_sample:
+       assert(ir->operands[0]->type == ir->type);
+       assert(ir->operands[0]->type->is_float());
+       assert(ir->operands[1]->type == glsl_type::int_type);
+       break;
+ 
+    case ir_triop_fma:
+       assert(ir->type->base_type == GLSL_TYPE_FLOAT ||
+              ir->type->base_type == GLSL_TYPE_DOUBLE);
+       assert(ir->type == ir->operands[0]->type);
+       assert(ir->type == ir->operands[1]->type);
+       assert(ir->type == ir->operands[2]->type);
+       break;
+ 
+    case ir_triop_lrp:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+              ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+       assert(ir->operands[0]->type == ir->operands[1]->type);
+       assert(ir->operands[2]->type == ir->operands[0]->type ||
+              ir->operands[2]->type == glsl_type::float_type ||
+              ir->operands[2]->type == glsl_type::double_type);
+       break;
+ 
+    case ir_triop_csel:
+       assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
+       assert(ir->type->vector_elements == ir->operands[0]->type->vector_elements);
+       assert(ir->type == ir->operands[1]->type);
+       assert(ir->type == ir->operands[2]->type);
+       break;
+ 
+    case ir_triop_bitfield_extract:
+       assert(ir->type->is_integer());
+       assert(ir->operands[0]->type == ir->type);
+       assert(ir->operands[1]->type == ir->type);
+       assert(ir->operands[2]->type == ir->type);
+       break;
+ 
+    case ir_triop_vector_insert:
+       assert(ir->operands[0]->type->is_vector());
+       assert(ir->operands[1]->type->is_scalar());
+       assert(ir->operands[0]->type->base_type == ir->operands[1]->type->base_type);
+       assert(ir->operands[2]->type->is_scalar()
+              && ir->operands[2]->type->is_integer());
+       assert(ir->type == ir->operands[0]->type);
+       break;
+ 
+    case ir_quadop_bitfield_insert:
+       assert(ir->type->is_integer());
+       assert(ir->operands[0]->type == ir->type);
+       assert(ir->operands[1]->type == ir->type);
+       assert(ir->operands[2]->type == ir->type);
+       assert(ir->operands[3]->type == ir->type);
+       break;
+ 
+    case ir_quadop_vector:
+       /* The vector operator collects some number of scalars and generates a
+        * vector from them.
+        *
+        *  - All of the operands must be scalar.
+        *  - Number of operands must matche the size of the resulting vector.
+        *  - Base type of the operands must match the base type of the result.
+        */
+       assert(ir->type->is_vector());
+       switch (ir->type->vector_elements) {
+       case 2:
+        assert(ir->operands[0]->type->is_scalar());
+        assert(ir->operands[0]->type->base_type == ir->type->base_type);
+        assert(ir->operands[1]->type->is_scalar());
+        assert(ir->operands[1]->type->base_type == ir->type->base_type);
+        assert(ir->operands[2] == NULL);
+        assert(ir->operands[3] == NULL);
+        break;
+       case 3:
+        assert(ir->operands[0]->type->is_scalar());
+        assert(ir->operands[0]->type->base_type == ir->type->base_type);
+        assert(ir->operands[1]->type->is_scalar());
+        assert(ir->operands[1]->type->base_type == ir->type->base_type);
+        assert(ir->operands[2]->type->is_scalar());
+        assert(ir->operands[2]->type->base_type == ir->type->base_type);
+        assert(ir->operands[3] == NULL);
+        break;
+       case 4:
+        assert(ir->operands[0]->type->is_scalar());
+        assert(ir->operands[0]->type->base_type == ir->type->base_type);
+        assert(ir->operands[1]->type->is_scalar());
+        assert(ir->operands[1]->type->base_type == ir->type->base_type);
+        assert(ir->operands[2]->type->is_scalar());
+        assert(ir->operands[2]->type->base_type == ir->type->base_type);
+        assert(ir->operands[3]->type->is_scalar());
+        assert(ir->operands[3]->type->base_type == ir->type->base_type);
+        break;
+       default:
+        /* The is_vector assertion above should prevent execution from ever
+         * getting here.
+         */
+        assert(!"Should not get here.");
+        break;
+       }
+    }
+ 
+    return visit_continue;
+ }
+ 
+ ir_visitor_status
+ ir_validate::visit_leave(ir_swizzle *ir)
+ {
+    unsigned int chans[4] = {ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w};
+ 
+    for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
+       if (chans[i] >= ir->val->type->vector_elements) {
+        printf("ir_swizzle @ %p specifies a channel not present "
+               "in the value.\n", (void *) ir);
+        ir->print();
+        abort();
+       }
+    }
+ 
+    return visit_continue;
+ }
+ 
+ ir_visitor_status
+ ir_validate::visit(ir_variable *ir)
+ {
+    /* An ir_variable is the one thing that can (and will) appear multiple times
+     * in an IR tree.  It is added to the hashtable so that it can be used
+     * in the ir_dereference_variable handler to ensure that a variable is
+     * declared before it is dereferenced.
+     */
+    if (ir->name && ir->is_name_ralloced())
+       assert(ralloc_parent(ir->name) == ir);
+ 
+    _mesa_set_add(ir_set, ir);
+ 
+    /* If a variable is an array, verify that the maximum array index is in
+     * bounds.  There was once an error in AST-to-HIR conversion that set this
+     * to be out of bounds.
+     */
+    if (ir->type->array_size() > 0) {
+       if (ir->data.max_array_access >= ir->type->length) {
+        printf("ir_variable has maximum access out of bounds (%d vs %d)\n",
+               ir->data.max_array_access, ir->type->length - 1);
+        ir->print();
+        abort();
+       }
+    }
+ 
+    /* If a variable is an interface block (or an array of interface blocks),
+     * verify that the maximum array index for each interface member is in
+     * bounds.
+     */
+    if (ir->is_interface_instance()) {
+       const glsl_struct_field *fields =
+          ir->get_interface_type()->fields.structure;
+       for (unsigned i = 0; i < ir->get_interface_type()->length; i++) {
+          if (fields[i].type->array_size() > 0) {
+             const unsigned *const max_ifc_array_access =
+                ir->get_max_ifc_array_access();
+ 
+             assert(max_ifc_array_access != NULL);
+ 
+             if (max_ifc_array_access[i] >= fields[i].type->length) {
+                printf("ir_variable has maximum access out of bounds for "
+                       "field %s (%d vs %d)\n", fields[i].name,
+                       max_ifc_array_access[i], fields[i].type->length);
+                ir->print();
+                abort();
+             }
+          }
+       }
+    }
+ 
+    if (ir->constant_initializer != NULL && !ir->data.has_initializer) {
+       printf("ir_variable didn't have an initializer, but has a constant "
+            "initializer value.\n");
+       ir->print();
+       abort();
+    }
+ 
+    if (ir->data.mode == ir_var_uniform
+        && is_gl_identifier(ir->name)
+        && ir->get_state_slots() == NULL) {
+       printf("built-in uniform has no state\n");
+       ir->print();
+       abort();
+    }
+ 
+    return visit_continue;
+ }
+ 
+ ir_visitor_status
+ ir_validate::visit_enter(ir_assignment *ir)
+ {
+    const ir_dereference *const lhs = ir->lhs;
+    if (lhs->type->is_scalar() || lhs->type->is_vector()) {
+       if (ir->write_mask == 0) {
+        printf("Assignment LHS is %s, but write mask is 0:\n",
+               lhs->type->is_scalar() ? "scalar" : "vector");
+        ir->print();
+        abort();
+       }
+ 
+       int lhs_components = 0;
+       for (int i = 0; i < 4; i++) {
+        if (ir->write_mask & (1 << i))
+           lhs_components++;
+       }
+ 
+       if (lhs_components != ir->rhs->type->vector_elements) {
+        printf("Assignment count of LHS write mask channels enabled not\n"
+               "matching RHS vector size (%d LHS, %d RHS).\n",
+               lhs_components, ir->rhs->type->vector_elements);
+        ir->print();
+        abort();
+       }
+    }
+ 
+    this->validate_ir(ir, this->data_enter);
+ 
+    return visit_continue;
+ }
+ 
+ ir_visitor_status
+ ir_validate::visit_enter(ir_call *ir)
+ {
+    ir_function_signature *const callee = ir->callee;
+ 
+    if (callee->ir_type != ir_type_function_signature) {
+       printf("IR called by ir_call is not ir_function_signature!\n");
+       abort();
+    }
+ 
+    if (ir->return_deref) {
+       if (ir->return_deref->type != callee->return_type) {
+        printf("callee type %s does not match return storage type %s\n",
+               callee->return_type->name, ir->return_deref->type->name);
+        abort();
+       }
+    } else if (callee->return_type != glsl_type::void_type) {
+       printf("ir_call has non-void callee but no return storage\n");
+       abort();
+    }
+ 
+    const exec_node *formal_param_node = callee->parameters.head;
+    const exec_node *actual_param_node = ir->actual_parameters.head;
+    while (true) {
+       if (formal_param_node->is_tail_sentinel()
+           != actual_param_node->is_tail_sentinel()) {
+          printf("ir_call has the wrong number of parameters:\n");
+          goto dump_ir;
+       }
+       if (formal_param_node->is_tail_sentinel()) {
+          break;
+       }
+       const ir_variable *formal_param
+          = (const ir_variable *) formal_param_node;
+       const ir_rvalue *actual_param
+          = (const ir_rvalue *) actual_param_node;
+       if (formal_param->type != actual_param->type) {
+          printf("ir_call parameter type mismatch:\n");
+          goto dump_ir;
+       }
+       if (formal_param->data.mode == ir_var_function_out
+           || formal_param->data.mode == ir_var_function_inout) {
+          if (!actual_param->is_lvalue()) {
+             printf("ir_call out/inout parameters must be lvalues:\n");
+             goto dump_ir;
+          }
+       }
+       formal_param_node = formal_param_node->next;
+       actual_param_node = actual_param_node->next;
+    }
+ 
+    return visit_continue;
+ 
+ dump_ir:
+    ir->print();
+    printf("callee:\n");
+    callee->print();
+    abort();
+    return visit_stop;
+ }
+ 
+ void
+ ir_validate::validate_ir(ir_instruction *ir, void *data)
+ {
+    struct set *ir_set = (struct set *) data;
+ 
+    if (_mesa_set_search(ir_set, ir)) {
+       printf("Instruction node present twice in ir tree:\n");
+       ir->print();
+       printf("\n");
+       abort();
+    }
+    _mesa_set_add(ir_set, ir);
+ }
+ 
+ void
+ check_node_type(ir_instruction *ir, void *data)
+ {
+    (void) data;
+ 
+    if (ir->ir_type >= ir_type_max) {
+       printf("Instruction node with unset type\n");
+       ir->print(); printf("\n");
+    }
+    ir_rvalue *value = ir->as_rvalue();
+    if (value != NULL)
+       assert(value->type != glsl_type::error_type);
+ }
+ 
+ void
+ validate_ir_tree(exec_list *instructions)
+ {
+    /* We shouldn't have any reason to validate IR in a release build,
+     * and it's half composed of assert()s anyway which wouldn't do
+     * anything.
+     */
+ #ifdef DEBUG
+    ir_validate v;
+ 
+    v.run(instructions);
+ 
+    foreach_in_list(ir_instruction, ir, instructions) {
+       visit_tree(ir, check_node_type, NULL);
+    }
+ #endif
+ }
diff --cc src/compiler/glsl/link_uniform_initializers.cpp

index 0000000000000000000000000000000000000000,58d21e5125efe666e0e8f52e37adfec669dd9a89..cdc1d3ac7be96fe79e4ed9950bba155a12623065

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/link_uniform_initializers.cpp
+++ b/src/compiler/glsl/link_uniform_initializers.cpp
@@@ -1,0 -1,355 +1,356 @@@
+ /*
+  * Copyright © 2012 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ #include "main/core.h"
+ #include "ir.h"
+ #include "linker.h"
+ #include "ir_uniform.h"
+ 
+ /* These functions are put in a "private" namespace instead of being marked
+  * static so that the unit tests can access them.  See
+  * http://code.google.com/p/googletest/wiki/AdvancedGuide#Testing_Private_Code
+  */
+ namespace linker {
+ 
+ gl_uniform_storage *
+ get_storage(gl_uniform_storage *storage, unsigned num_storage,
+           const char *name)
+ {
+    for (unsigned int i = 0; i < num_storage; i++) {
+       if (strcmp(name, storage[i].name) == 0)
+        return &storage[i];
+    }
+ 
+    return NULL;
+ }
+ 
+ static unsigned
+ get_uniform_block_index(const gl_shader_program *shProg,
+                         const char *uniformBlockName)
+ {
+    for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
+       if (!strcmp(shProg->BufferInterfaceBlocks[i].Name, uniformBlockName))
+        return i;
+    }
+ 
+    return GL_INVALID_INDEX;
+ }
+ 
+ void
+ copy_constant_to_storage(union gl_constant_value *storage,
+                        const ir_constant *val,
+                        const enum glsl_base_type base_type,
+                          const unsigned int elements,
+                          unsigned int boolean_true)
+ {
+    for (unsigned int i = 0; i < elements; i++) {
+       switch (base_type) {
+       case GLSL_TYPE_UINT:
+        storage[i].u = val->value.u[i];
+        break;
+       case GLSL_TYPE_INT:
+       case GLSL_TYPE_SAMPLER:
+        storage[i].i = val->value.i[i];
+        break;
+       case GLSL_TYPE_FLOAT:
+        storage[i].f = val->value.f[i];
+        break;
+       case GLSL_TYPE_DOUBLE:
+          /* XXX need to check on big-endian */
+          storage[i * 2].u = *(uint32_t *)&val->value.d[i];
+          storage[i * 2 + 1].u = *(((uint32_t *)&val->value.d[i]) + 1);
+          break;
+       case GLSL_TYPE_BOOL:
+        storage[i].b = val->value.b[i] ? boolean_true : 0;
+        break;
+       case GLSL_TYPE_ARRAY:
+       case GLSL_TYPE_STRUCT:
+       case GLSL_TYPE_IMAGE:
+       case GLSL_TYPE_ATOMIC_UINT:
+       case GLSL_TYPE_INTERFACE:
++      case GLSL_TYPE_FUNCTION:
+       case GLSL_TYPE_VOID:
+       case GLSL_TYPE_SUBROUTINE:
+       case GLSL_TYPE_ERROR:
+        /* All other types should have already been filtered by other
+         * paths in the caller.
+         */
+        assert(!"Should not get here.");
+        break;
+       }
+    }
+ }
+ 
+ /**
+  * Initialize an opaque uniform from the value of an explicit binding
+  * qualifier specified in the shader.  Atomic counters are different because
+  * they have no storage and should be handled elsewhere.
+  */
+ void
+ set_opaque_binding(void *mem_ctx, gl_shader_program *prog,
+                    const glsl_type *type, const char *name, int *binding)
+ {
+ 
+    if (type->is_array() && type->fields.array->is_array()) {
+       const glsl_type *const element_type = type->fields.array;
+ 
+       for (unsigned int i = 0; i < type->length; i++) {
+        const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i);
+ 
+        set_opaque_binding(mem_ctx, prog, element_type,
+                             element_name, binding);
+       }
+    } else {
+       struct gl_uniform_storage *const storage =
+          get_storage(prog->UniformStorage, prog->NumUniformStorage, name);
+ 
+       if (storage == NULL) {
+          assert(storage != NULL);
+          return;
+       }
+ 
+       const unsigned elements = MAX2(storage->array_elements, 1);
+ 
+       /* Section 4.4.4 (Opaque-Uniform Layout Qualifiers) of the GLSL 4.20 spec
+        * says:
+        *
+        *     "If the binding identifier is used with an array, the first element
+        *     of the array takes the specified unit and each subsequent element
+        *     takes the next consecutive unit."
+        */
+       for (unsigned int i = 0; i < elements; i++) {
+          storage->storage[i].i = (*binding)++;
+       }
+ 
+       for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
+         gl_shader *shader = prog->_LinkedShaders[sh];
+ 
+          if (shader) {
+             if (storage->type->base_type == GLSL_TYPE_SAMPLER &&
+                 storage->opaque[sh].active) {
+                for (unsigned i = 0; i < elements; i++) {
+                   const unsigned index = storage->opaque[sh].index + i;
+                   shader->SamplerUnits[index] = storage->storage[i].i;
+                }
+ 
+             } else if (storage->type->base_type == GLSL_TYPE_IMAGE &&
+                     storage->opaque[sh].active) {
+                for (unsigned i = 0; i < elements; i++) {
+                   const unsigned index = storage->opaque[sh].index + i;
+                   shader->ImageUnits[index] = storage->storage[i].i;
+                }
+             }
+          }
+       }
+ 
+       storage->initialized = true;
+    }
+ }
+ 
+ void
+ set_block_binding(gl_shader_program *prog, const char *block_name, int binding)
+ {
+    const unsigned block_index = get_uniform_block_index(prog, block_name);
+ 
+    if (block_index == GL_INVALID_INDEX) {
+       assert(block_index != GL_INVALID_INDEX);
+       return;
+    }
+ 
+       /* This is a field of a UBO.  val is the binding index. */
+       for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+          int stage_index = prog->InterfaceBlockStageIndex[i][block_index];
+ 
+          if (stage_index != -1) {
+             struct gl_shader *sh = prog->_LinkedShaders[i];
+             sh->BufferInterfaceBlocks[stage_index].Binding = binding;
+          }
+       }
+ }
+ 
+ void
+ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog,
+                       const char *name, const glsl_type *type,
+                         ir_constant *val, unsigned int boolean_true)
+ {
+    const glsl_type *t_without_array = type->without_array();
+    if (type->is_record()) {
+       ir_constant *field_constant;
+ 
+       field_constant = (ir_constant *)val->components.get_head();
+ 
+       for (unsigned int i = 0; i < type->length; i++) {
+        const glsl_type *field_type = type->fields.structure[i].type;
+        const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
+                                           type->fields.structure[i].name);
+        set_uniform_initializer(mem_ctx, prog, field_name,
+                                  field_type, field_constant, boolean_true);
+        field_constant = (ir_constant *)field_constant->next;
+       }
+       return;
+    } else if (t_without_array->is_record() ||
+               (type->is_array() && type->fields.array->is_array())) {
+       const glsl_type *const element_type = type->fields.array;
+ 
+       for (unsigned int i = 0; i < type->length; i++) {
+        const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i);
+ 
+        set_uniform_initializer(mem_ctx, prog, element_name,
+                                  element_type, val->array_elements[i],
+                                  boolean_true);
+       }
+       return;
+    }
+ 
+    struct gl_uniform_storage *const storage =
+       get_storage(prog->UniformStorage,
+                   prog->NumUniformStorage,
+                 name);
+    if (storage == NULL) {
+       assert(storage != NULL);
+       return;
+    }
+ 
+    if (val->type->is_array()) {
+       const enum glsl_base_type base_type =
+        val->array_elements[0]->type->base_type;
+       const unsigned int elements = val->array_elements[0]->type->components();
+       unsigned int idx = 0;
+       unsigned dmul = (base_type == GLSL_TYPE_DOUBLE) ? 2 : 1;
+ 
+       assert(val->type->length >= storage->array_elements);
+       for (unsigned int i = 0; i < storage->array_elements; i++) {
+        copy_constant_to_storage(& storage->storage[idx],
+                                 val->array_elements[i],
+                                 base_type,
+                                   elements,
+                                   boolean_true);
+ 
+        idx += elements * dmul;
+       }
+    } else {
+       copy_constant_to_storage(storage->storage,
+                              val,
+                              val->type->base_type,
+                                val->type->components(),
+                                boolean_true);
+ 
+       if (storage->type->is_sampler()) {
+          for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
+             gl_shader *shader = prog->_LinkedShaders[sh];
+ 
+             if (shader && storage->opaque[sh].active) {
+                unsigned index = storage->opaque[sh].index;
+ 
+                shader->SamplerUnits[index] = storage->storage[0].i;
+             }
+          }
+       }
+    }
+ 
+    storage->initialized = true;
+ }
+ }
+ 
+ void
+ link_set_uniform_initializers(struct gl_shader_program *prog,
+                               unsigned int boolean_true)
+ {
+    void *mem_ctx = NULL;
+ 
+    for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) {
+       struct gl_shader *shader = prog->_LinkedShaders[i];
+ 
+       if (shader == NULL)
+        continue;
+ 
+       foreach_in_list(ir_instruction, node, shader->ir) {
+        ir_variable *const var = node->as_variable();
+ 
+        if (!var || (var->data.mode != ir_var_uniform &&
+            var->data.mode != ir_var_shader_storage))
+           continue;
+ 
+        if (!mem_ctx)
+           mem_ctx = ralloc_context(NULL);
+ 
+          if (var->data.explicit_binding) {
+             const glsl_type *const type = var->type;
+ 
+             if (type->without_array()->is_sampler() ||
+                 type->without_array()->is_image()) {
+                int binding = var->data.binding;
+                linker::set_opaque_binding(mem_ctx, prog, var->type,
+                                           var->name, &binding);
+             } else if (var->is_in_buffer_block()) {
+                const glsl_type *const iface_type = var->get_interface_type();
+ 
+                /* If the variable is an array and it is an interface instance,
+                 * we need to set the binding for each array element.  Just
+                 * checking that the variable is an array is not sufficient.
+                 * The variable could be an array element of a uniform block
+                 * that lacks an instance name.  For example:
+                 *
+                 *     uniform U {
+                 *         float f[4];
+                 *     };
+                 *
+                 * In this case "f" would pass is_in_buffer_block (above) and
+                 * type->is_array(), but it will fail is_interface_instance().
+                 */
+                if (var->is_interface_instance() && var->type->is_array()) {
+                   for (unsigned i = 0; i < var->type->length; i++) {
+                      const char *name =
+                         ralloc_asprintf(mem_ctx, "%s[%u]", iface_type->name, i);
+ 
+                      /* Section 4.4.3 (Uniform Block Layout Qualifiers) of the
+                       * GLSL 4.20 spec says:
+                       *
+                       *     "If the binding identifier is used with a uniform
+                       *     block instanced as an array then the first element
+                       *     of the array takes the specified block binding and
+                       *     each subsequent element takes the next consecutive
+                       *     uniform block binding point."
+                       */
+                      linker::set_block_binding(prog, name,
+                                                var->data.binding + i);
+                   }
+                } else {
+                   linker::set_block_binding(prog, iface_type->name,
+                                             var->data.binding);
+                }
+             } else if (type->contains_atomic()) {
+                /* we don't actually need to do anything. */
+             } else {
+                assert(!"Explicit binding not on a sampler, UBO or atomic.");
+             }
+          } else if (var->constant_initializer) {
+             linker::set_uniform_initializer(mem_ctx, prog, var->name,
+                                             var->type, var->constant_initializer,
+                                             boolean_true);
+          }
+       }
+    }
+ 
+    ralloc_free(mem_ctx);
+ }
diff --cc src/compiler/glsl/lower_packing_builtins.cpp

index 0000000000000000000000000000000000000000,7f18238bc6eb3333b864d704d0f62fdf4c0adb2b..a41627bd561ccb28029d4ac3a9fa1907c92b197b

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/lower_packing_builtins.cpp
+++ b/src/compiler/glsl/lower_packing_builtins.cpp
@@@ -1,0 -1,1412 +1,1311 @@@
- -      /* Mutually exclusive options. */
- -      assert(!((op_mask & LOWER_PACK_HALF_2x16) &&
- -               (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT)));
- -
- -      assert(!((op_mask & LOWER_UNPACK_HALF_2x16) &&
- -               (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT)));
- -
+ /*
+  * Copyright © 2012 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ #include "ir.h"
+ #include "ir_builder.h"
+ #include "ir_optimization.h"
+ #include "ir_rvalue_visitor.h"
+ 
+ namespace {
+ 
+ using namespace ir_builder;
+ 
+ /**
+  * A visitor that lowers built-in floating-point pack/unpack expressions
+  * such packSnorm2x16.
+  */
+ class lower_packing_builtins_visitor : public ir_rvalue_visitor {
+ public:
+    /**
+     * \param op_mask is a bitmask of `enum lower_packing_builtins_op`
+     */
+    explicit lower_packing_builtins_visitor(int op_mask)
+       : op_mask(op_mask),
+         progress(false)
+    {
- -      case LOWER_PACK_HALF_2x16_TO_SPLIT:
- -         *rvalue = split_pack_half_2x16(op0);
- -         break;
+       factory.instructions = &factory_instructions;
+    }
+ 
+    virtual ~lower_packing_builtins_visitor()
+    {
+       assert(factory_instructions.is_empty());
+    }
+ 
+    bool get_progress() { return progress; }
+ 
+    void handle_rvalue(ir_rvalue **rvalue)
+    {
+       if (!*rvalue)
+        return;
+ 
+       ir_expression *expr = (*rvalue)->as_expression();
+       if (!expr)
+        return;
+ 
+       enum lower_packing_builtins_op lowering_op =
+          choose_lowering_op(expr->operation);
+ 
+       if (lowering_op == LOWER_PACK_UNPACK_NONE)
+          return;
+ 
+       setup_factory(ralloc_parent(expr));
+ 
+       ir_rvalue *op0 = expr->operands[0];
+       ralloc_steal(factory.mem_ctx, op0);
+ 
+       switch (lowering_op) {
+       case LOWER_PACK_SNORM_2x16:
+          *rvalue = lower_pack_snorm_2x16(op0);
+          break;
+       case LOWER_PACK_SNORM_4x8:
+          *rvalue = lower_pack_snorm_4x8(op0);
+          break;
+       case LOWER_PACK_UNORM_2x16:
+          *rvalue = lower_pack_unorm_2x16(op0);
+          break;
+       case LOWER_PACK_UNORM_4x8:
+          *rvalue = lower_pack_unorm_4x8(op0);
+          break;
+       case LOWER_PACK_HALF_2x16:
+          *rvalue = lower_pack_half_2x16(op0);
+          break;
- -      case LOWER_UNPACK_HALF_2x16_TO_SPLIT:
- -         *rvalue = split_unpack_half_2x16(op0);
- -         break;
+       case LOWER_UNPACK_SNORM_2x16:
+          *rvalue = lower_unpack_snorm_2x16(op0);
+          break;
+       case LOWER_UNPACK_SNORM_4x8:
+          *rvalue = lower_unpack_snorm_4x8(op0);
+          break;
+       case LOWER_UNPACK_UNORM_2x16:
+          *rvalue = lower_unpack_unorm_2x16(op0);
+          break;
+       case LOWER_UNPACK_UNORM_4x8:
+          *rvalue = lower_unpack_unorm_4x8(op0);
+          break;
+       case LOWER_UNPACK_HALF_2x16:
+          *rvalue = lower_unpack_half_2x16(op0);
+          break;
- -         result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT);
+       case LOWER_PACK_UNPACK_NONE:
+       case LOWER_PACK_USE_BFI:
+       case LOWER_PACK_USE_BFE:
+          assert(!"not reached");
+          break;
+       }
+ 
+       teardown_factory();
+       progress = true;
+    }
+ 
+ private:
+    const int op_mask;
+    bool progress;
+    ir_factory factory;
+    exec_list factory_instructions;
+ 
+    /**
+     * Determine the needed lowering operation by filtering \a expr_op
+     * through \ref op_mask.
+     */
+    enum lower_packing_builtins_op
+    choose_lowering_op(ir_expression_operation expr_op)
+    {
+       /* C++ regards int and enum as fundamentally different types.
+        * So, we can't simply return from each case; we must cast the return
+        * value.
+        */
+       int result;
+ 
+       switch (expr_op) {
+       case ir_unop_pack_snorm_2x16:
+          result = op_mask & LOWER_PACK_SNORM_2x16;
+          break;
+       case ir_unop_pack_snorm_4x8:
+          result = op_mask & LOWER_PACK_SNORM_4x8;
+          break;
+       case ir_unop_pack_unorm_2x16:
+          result = op_mask & LOWER_PACK_UNORM_2x16;
+          break;
+       case ir_unop_pack_unorm_4x8:
+          result = op_mask & LOWER_PACK_UNORM_4x8;
+          break;
+       case ir_unop_pack_half_2x16:
- -         result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT);
++         result = op_mask & LOWER_PACK_HALF_2x16;
+          break;
+       case ir_unop_unpack_snorm_2x16:
+          result = op_mask & LOWER_UNPACK_SNORM_2x16;
+          break;
+       case ir_unop_unpack_snorm_4x8:
+          result = op_mask & LOWER_UNPACK_SNORM_4x8;
+          break;
+       case ir_unop_unpack_unorm_2x16:
+          result = op_mask & LOWER_UNPACK_UNORM_2x16;
+          break;
+       case ir_unop_unpack_unorm_4x8:
+          result = op_mask & LOWER_UNPACK_UNORM_4x8;
+          break;
+       case ir_unop_unpack_half_2x16:
- -   /**
- -    * \brief Split packHalf2x16's vec2 operand into two floats.
- -    *
- -    * \param vec2_rval is packHalf2x16's input
- -    * \return a uint rvalue
- -    *
- -    * Some code generators, such as the i965 fragment shader, require that all
- -    * vector expressions be lowered to a sequence of scalar expressions.
- -    * However, packHalf2x16 cannot be scalarized by the same mechanism as
- -    * a true vector operation because its input and output have a differing
- -    * number of vector components.
- -    *
- -    * This method scalarizes packHalf2x16 by transforming it from an unary
- -    * operation having vector input to a binary operation having scalar input.
- -    * That is, it transforms
- -    *
- -    *    packHalf2x16(VEC2_RVAL);
- -    *
- -    * into
- -    *
- -    *    vec2 v = VEC2_RVAL;
- -    *    return packHalf2x16_split(v.x, v.y);
- -    */
- -   ir_rvalue*
- -   split_pack_half_2x16(ir_rvalue *vec2_rval)
- -   {
- -      assert(vec2_rval->type == glsl_type::vec2_type);
- -
- -      ir_variable *v = factory.make_temp(glsl_type::vec2_type,
- -                                         "tmp_split_pack_half_2x16_v");
- -      factory.emit(assign(v, vec2_rval));
- -
- -      return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v));
- -   }
- -
++         result = op_mask & LOWER_UNPACK_HALF_2x16;
+          break;
+       default:
+          result = LOWER_PACK_UNPACK_NONE;
+          break;
+       }
+ 
+       return static_cast<enum lower_packing_builtins_op>(result);
+    }
+ 
+    void
+    setup_factory(void *mem_ctx)
+    {
+       assert(factory.mem_ctx == NULL);
+       assert(factory.instructions->is_empty());
+ 
+       factory.mem_ctx = mem_ctx;
+    }
+ 
+    void
+    teardown_factory()
+    {
+       base_ir->insert_before(factory.instructions);
+       assert(factory.instructions->is_empty());
+       factory.mem_ctx = NULL;
+    }
+ 
+    template <typename T>
+    ir_constant*
+    constant(T x)
+    {
+       return factory.constant(x);
+    }
+ 
+    /**
+     * \brief Pack two uint16's into a single uint32.
+     *
+     * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32
+     * where the least significant bits specify the first element of the pair.
+     * Return the uint32.
+     */
+    ir_rvalue*
+    pack_uvec2_to_uint(ir_rvalue *uvec2_rval)
+    {
+       assert(uvec2_rval->type == glsl_type::uvec2_type);
+ 
+       /* uvec2 u = UVEC2_RVAL; */
+       ir_variable *u = factory.make_temp(glsl_type::uvec2_type,
+                                          "tmp_pack_uvec2_to_uint");
+       factory.emit(assign(u, uvec2_rval));
+ 
+       if (op_mask & LOWER_PACK_USE_BFI) {
+          return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)),
+                                 swizzle_y(u),
+                                 constant(16u),
+                                 constant(16u));
+       }
+ 
+       /* return (u.y << 16) | (u.x & 0xffff); */
+       return bit_or(lshift(swizzle_y(u), constant(16u)),
+                     bit_and(swizzle_x(u), constant(0xffffu)));
+    }
+ 
+    /**
+     * \brief Pack four uint8's into a single uint32.
+     *
+     * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a
+     * uint32 where the least significant bits specify the first element of the
+     * 4-tuple. Return the uint32.
+     */
+    ir_rvalue*
+    pack_uvec4_to_uint(ir_rvalue *uvec4_rval)
+    {
+       assert(uvec4_rval->type == glsl_type::uvec4_type);
+ 
+       ir_variable *u = factory.make_temp(glsl_type::uvec4_type,
+                                          "tmp_pack_uvec4_to_uint");
+ 
+       if (op_mask & LOWER_PACK_USE_BFI) {
+          /* uvec4 u = UVEC4_RVAL; */
+          factory.emit(assign(u, uvec4_rval));
+ 
+          return bitfield_insert(bitfield_insert(
+                                    bitfield_insert(
+                                       bit_and(swizzle_x(u), constant(0xffu)),
+                                       swizzle_y(u), constant(8u), constant(8u)),
+                                    swizzle_z(u), constant(16u), constant(8u)),
+                                 swizzle_w(u), constant(24u), constant(8u));
+       }
+ 
+       /* uvec4 u = UVEC4_RVAL & 0xff */
+       factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu))));
+ 
+       /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */
+       return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)),
+                            lshift(swizzle_z(u), constant(16u))),
+                     bit_or(lshift(swizzle_y(u), constant(8u)),
+                            swizzle_x(u)));
+    }
+ 
+    /**
+     * \brief Unpack a uint32 into two uint16's.
+     *
+     * Interpret the given uint32 as a uint16 pair where the uint32's least
+     * significant bits specify the pair's first element. Return the uint16
+     * pair as a uvec2.
+     */
+    ir_rvalue*
+    unpack_uint_to_uvec2(ir_rvalue *uint_rval)
+    {
+       assert(uint_rval->type == glsl_type::uint_type);
+ 
+       /* uint u = UINT_RVAL; */
+       ir_variable *u = factory.make_temp(glsl_type::uint_type,
+                                           "tmp_unpack_uint_to_uvec2_u");
+       factory.emit(assign(u, uint_rval));
+ 
+       /* uvec2 u2; */
+       ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type,
+                                            "tmp_unpack_uint_to_uvec2_u2");
+ 
+       /* u2.x = u & 0xffffu; */
+       factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X));
+ 
+       /* u2.y = u >> 16u; */
+       factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y));
+ 
+       return deref(u2).val;
+    }
+ 
+    /**
+     * \brief Unpack a uint32 into two int16's.
+     *
+     * Specifically each 16-bit value is sign-extended to the full width of an
+     * int32 on return.
+     */
+    ir_rvalue *
+    unpack_uint_to_ivec2(ir_rvalue *uint_rval)
+    {
+       assert(uint_rval->type == glsl_type::uint_type);
+ 
+       if (!(op_mask & LOWER_PACK_USE_BFE)) {
+          return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)),
+                               constant(16u)),
+                        constant(16u));
+       }
+ 
+       ir_variable *i = factory.make_temp(glsl_type::int_type,
+                                          "tmp_unpack_uint_to_ivec2_i");
+       factory.emit(assign(i, u2i(uint_rval)));
+ 
+       /* ivec2 i2; */
+       ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type,
+                                           "tmp_unpack_uint_to_ivec2_i2");
+ 
+       factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)),
+                           WRITEMASK_X));
+       factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)),
+                           WRITEMASK_Y));
+ 
+       return deref(i2).val;
+    }
+ 
+    /**
+     * \brief Unpack a uint32 into four uint8's.
+     *
+     * Interpret the given uint32 as a uint8 4-tuple where the uint32's least
+     * significant bits specify the 4-tuple's first element. Return the uint8
+     * 4-tuple as a uvec4.
+     */
+    ir_rvalue*
+    unpack_uint_to_uvec4(ir_rvalue *uint_rval)
+    {
+       assert(uint_rval->type == glsl_type::uint_type);
+ 
+       /* uint u = UINT_RVAL; */
+       ir_variable *u = factory.make_temp(glsl_type::uint_type,
+                                           "tmp_unpack_uint_to_uvec4_u");
+       factory.emit(assign(u, uint_rval));
+ 
+       /* uvec4 u4; */
+       ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type,
+                                            "tmp_unpack_uint_to_uvec4_u4");
+ 
+       /* u4.x = u & 0xffu; */
+       factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X));
+ 
+       if (op_mask & LOWER_PACK_USE_BFE) {
+          /* u4.y = bitfield_extract(u, 8, 8); */
+          factory.emit(assign(u4, bitfield_extract(u, constant(8u), constant(8u)),
+                              WRITEMASK_Y));
+ 
+          /* u4.z = bitfield_extract(u, 16, 8); */
+          factory.emit(assign(u4, bitfield_extract(u, constant(16u), constant(8u)),
+                              WRITEMASK_Z));
+       } else {
+          /* u4.y = (u >> 8u) & 0xffu; */
+          factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
+                                          constant(0xffu)), WRITEMASK_Y));
+ 
+          /* u4.z = (u >> 16u) & 0xffu; */
+          factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
+                                          constant(0xffu)), WRITEMASK_Z));
+       }
+ 
+       /* u4.w = (u >> 24u) */
+       factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W));
+ 
+       return deref(u4).val;
+    }
+ 
+    /**
+     * \brief Unpack a uint32 into four int8's.
+     *
+     * Specifically each 8-bit value is sign-extended to the full width of an
+     * int32 on return.
+     */
+    ir_rvalue *
+    unpack_uint_to_ivec4(ir_rvalue *uint_rval)
+    {
+       assert(uint_rval->type == glsl_type::uint_type);
+ 
+       if (!(op_mask & LOWER_PACK_USE_BFE)) {
+          return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
+                               constant(24u)),
+                        constant(24u));
+       }
+ 
+       ir_variable *i = factory.make_temp(glsl_type::int_type,
+                                          "tmp_unpack_uint_to_ivec4_i");
+       factory.emit(assign(i, u2i(uint_rval)));
+ 
+       /* ivec4 i4; */
+       ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type,
+                                           "tmp_unpack_uint_to_ivec4_i4");
+ 
+       factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)),
+                           WRITEMASK_X));
+       factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)),
+                           WRITEMASK_Y));
+       factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)),
+                           WRITEMASK_Z));
+       factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)),
+                           WRITEMASK_W));
+ 
+       return deref(i4).val;
+    }
+ 
+    /**
+     * \brief Lower a packSnorm2x16 expression.
+     *
+     * \param vec2_rval is packSnorm2x16's input
+     * \return packSnorm2x16's output as a uint rvalue
+     */
+    ir_rvalue*
+    lower_pack_snorm_2x16(ir_rvalue *vec2_rval)
+    {
+       /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec:
+        *
+        *    highp uint packSnorm2x16(vec2 v)
+        *    --------------------------------
+        *    First, converts each component of the normalized floating-point value
+        *    v into 16-bit integer values. Then, the results are packed into the
+        *    returned 32-bit unsigned integer.
+        *
+        *    The conversion for component c of v to fixed point is done as
+        *    follows:
+        *
+        *       packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
+        *
+        *    The first component of the vector will be written to the least
+        *    significant bits of the output; the last component will be written to
+        *    the most significant bits.
+        *
+        * This function generates IR that approximates the following pseudo-GLSL:
+        *
+        *     return pack_uvec2_to_uint(
+        *         uvec2(ivec2(
+        *           round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f))));
+        *
+        * It is necessary to first convert the vec2 to ivec2 rather than directly
+        * converting vec2 to uvec2 because the latter conversion is undefined.
+        * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to
+        * convert a negative floating point value to an uint".
+        */
+       assert(vec2_rval->type == glsl_type::vec2_type);
+ 
+       ir_rvalue *result = pack_uvec2_to_uint(
+             i2u(f2i(round_even(mul(clamp(vec2_rval,
+                                          constant(-1.0f),
+                                          constant(1.0f)),
+                                    constant(32767.0f))))));
+ 
+       assert(result->type == glsl_type::uint_type);
+       return result;
+    }
+ 
+    /**
+     * \brief Lower a packSnorm4x8 expression.
+     *
+     * \param vec4_rval is packSnorm4x8's input
+     * \return packSnorm4x8's output as a uint rvalue
+     */
+    ir_rvalue*
+    lower_pack_snorm_4x8(ir_rvalue *vec4_rval)
+    {
+       /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+        *
+        *    highp uint packSnorm4x8(vec4 v)
+        *    -------------------------------
+        *    First, converts each component of the normalized floating-point value
+        *    v into 8-bit integer values. Then, the results are packed into the
+        *    returned 32-bit unsigned integer.
+        *
+        *    The conversion for component c of v to fixed point is done as
+        *    follows:
+        *
+        *       packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
+        *
+        *    The first component of the vector will be written to the least
+        *    significant bits of the output; the last component will be written to
+        *    the most significant bits.
+        *
+        * This function generates IR that approximates the following pseudo-GLSL:
+        *
+        *     return pack_uvec4_to_uint(
+        *         uvec4(ivec4(
+        *           round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f))));
+        *
+        * It is necessary to first convert the vec4 to ivec4 rather than directly
+        * converting vec4 to uvec4 because the latter conversion is undefined.
+        * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to
+        * convert a negative floating point value to an uint".
+        */
+       assert(vec4_rval->type == glsl_type::vec4_type);
+ 
+       ir_rvalue *result = pack_uvec4_to_uint(
+             i2u(f2i(round_even(mul(clamp(vec4_rval,
+                                          constant(-1.0f),
+                                          constant(1.0f)),
+                                    constant(127.0f))))));
+ 
+       assert(result->type == glsl_type::uint_type);
+       return result;
+    }
+ 
+    /**
+     * \brief Lower an unpackSnorm2x16 expression.
+     *
+     * \param uint_rval is unpackSnorm2x16's input
+     * \return unpackSnorm2x16's output as a vec2 rvalue
+     */
+    ir_rvalue*
+    lower_unpack_snorm_2x16(ir_rvalue *uint_rval)
+    {
+       /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec:
+        *
+        *    highp vec2 unpackSnorm2x16 (highp uint p)
+        *    -----------------------------------------
+        *    First, unpacks a single 32-bit unsigned integer p into a pair of
+        *    16-bit unsigned integers. Then, each component is converted to
+        *    a normalized floating-point value to generate the returned
+        *    two-component vector.
+        *
+        *    The conversion for unpacked fixed-point value f to floating point is
+        *    done as follows:
+        *
+        *       unpackSnorm2x16: clamp(f / 32767.0, -1,+1)
+        *
+        *    The first component of the returned vector will be extracted from the
+        *    least significant bits of the input; the last component will be
+        *    extracted from the most significant bits.
+        *
+        * This function generates IR that approximates the following pseudo-GLSL:
+        *
+        *    return clamp(
+        *       ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f,
+        *       -1.0f, 1.0f);
+        *
+        * The above IR may appear unnecessarily complex, but the intermediate
+        * conversion to ivec2 and the bit shifts are necessary to correctly unpack
+        * negative floats.
+        *
+        * To see why, consider packing and then unpacking vec2(-1.0, 0.0).
+        * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we
+        * place that int16 into an int32, which results in the *positive* integer
+        * 0x0000ffff.  The int16's sign bit becomes, in the int32, the rather
+        * unimportant bit 16. We must now extend the int16's sign bit into bits
+        * 17-32, which is accomplished by left-shifting then right-shifting.
+        */
+ 
+       assert(uint_rval->type == glsl_type::uint_type);
+ 
+       ir_rvalue *result =
+         clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)),
+                   constant(32767.0f)),
+               constant(-1.0f),
+               constant(1.0f));
+ 
+       assert(result->type == glsl_type::vec2_type);
+       return result;
+    }
+ 
+    /**
+     * \brief Lower an unpackSnorm4x8 expression.
+     *
+     * \param uint_rval is unpackSnorm4x8's input
+     * \return unpackSnorm4x8's output as a vec4 rvalue
+     */
+    ir_rvalue*
+    lower_unpack_snorm_4x8(ir_rvalue *uint_rval)
+    {
+       /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+        *
+        *    highp vec4 unpackSnorm4x8 (highp uint p)
+        *    ----------------------------------------
+        *    First, unpacks a single 32-bit unsigned integer p into four
+        *    8-bit unsigned integers. Then, each component is converted to
+        *    a normalized floating-point value to generate the returned
+        *    four-component vector.
+        *
+        *    The conversion for unpacked fixed-point value f to floating point is
+        *    done as follows:
+        *
+        *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
+        *
+        *    The first component of the returned vector will be extracted from the
+        *    least significant bits of the input; the last component will be
+        *    extracted from the most significant bits.
+        *
+        * This function generates IR that approximates the following pseudo-GLSL:
+        *
+        *    return clamp(
+        *       ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f,
+        *       -1.0f, 1.0f);
+        *
+        * The above IR may appear unnecessarily complex, but the intermediate
+        * conversion to ivec4 and the bit shifts are necessary to correctly unpack
+        * negative floats.
+        *
+        * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0,
+        * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we
+        * place that int8 into an int32, which results in the *positive* integer
+        * 0x000000ff.  The int8's sign bit becomes, in the int32, the rather
+        * unimportant bit 8. We must now extend the int8's sign bit into bits
+        * 9-32, which is accomplished by left-shifting then right-shifting.
+        */
+ 
+       assert(uint_rval->type == glsl_type::uint_type);
+ 
+       ir_rvalue *result =
+         clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)),
+                   constant(127.0f)),
+               constant(-1.0f),
+               constant(1.0f));
+ 
+       assert(result->type == glsl_type::vec4_type);
+       return result;
+    }
+ 
+    /**
+     * \brief Lower a packUnorm2x16 expression.
+     *
+     * \param vec2_rval is packUnorm2x16's input
+     * \return packUnorm2x16's output as a uint rvalue
+     */
+    ir_rvalue*
+    lower_pack_unorm_2x16(ir_rvalue *vec2_rval)
+    {
+       /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec:
+        *
+        *    highp uint packUnorm2x16 (vec2 v)
+        *    ---------------------------------
+        *    First, converts each component of the normalized floating-point value
+        *    v into 16-bit integer values. Then, the results are packed into the
+        *    returned 32-bit unsigned integer.
+        *
+        *    The conversion for component c of v to fixed point is done as
+        *    follows:
+        *
+        *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
+        *
+        *    The first component of the vector will be written to the least
+        *    significant bits of the output; the last component will be written to
+        *    the most significant bits.
+        *
+        * This function generates IR that approximates the following pseudo-GLSL:
+        *
+        *     return pack_uvec2_to_uint(uvec2(
+        *                round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f)));
+        *
+        * Here it is safe to directly convert the vec2 to uvec2 because the vec2
+        * has been clamped to a non-negative range.
+        */
+ 
+       assert(vec2_rval->type == glsl_type::vec2_type);
+ 
+       ir_rvalue *result = pack_uvec2_to_uint(
+          f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f)))));
+ 
+       assert(result->type == glsl_type::uint_type);
+       return result;
+    }
+ 
+    /**
+     * \brief Lower a packUnorm4x8 expression.
+     *
+     * \param vec4_rval is packUnorm4x8's input
+     * \return packUnorm4x8's output as a uint rvalue
+     */
+    ir_rvalue*
+    lower_pack_unorm_4x8(ir_rvalue *vec4_rval)
+    {
+       /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+        *
+        *    highp uint packUnorm4x8 (vec4 v)
+        *    --------------------------------
+        *    First, converts each component of the normalized floating-point value
+        *    v into 8-bit integer values. Then, the results are packed into the
+        *    returned 32-bit unsigned integer.
+        *
+        *    The conversion for component c of v to fixed point is done as
+        *    follows:
+        *
+        *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
+        *
+        *    The first component of the vector will be written to the least
+        *    significant bits of the output; the last component will be written to
+        *    the most significant bits.
+        *
+        * This function generates IR that approximates the following pseudo-GLSL:
+        *
+        *     return pack_uvec4_to_uint(uvec4(
+        *                round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f)));
+        *
+        * Here it is safe to directly convert the vec4 to uvec4 because the vec4
+        * has been clamped to a non-negative range.
+        */
+ 
+       assert(vec4_rval->type == glsl_type::vec4_type);
+ 
+       ir_rvalue *result = pack_uvec4_to_uint(
+          f2u(round_even(mul(saturate(vec4_rval), constant(255.0f)))));
+ 
+       assert(result->type == glsl_type::uint_type);
+       return result;
+    }
+ 
+    /**
+     * \brief Lower an unpackUnorm2x16 expression.
+     *
+     * \param uint_rval is unpackUnorm2x16's input
+     * \return unpackUnorm2x16's output as a vec2 rvalue
+     */
+    ir_rvalue*
+    lower_unpack_unorm_2x16(ir_rvalue *uint_rval)
+    {
+       /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec:
+        *
+        *    highp vec2 unpackUnorm2x16 (highp uint p)
+        *    -----------------------------------------
+        *    First, unpacks a single 32-bit unsigned integer p into a pair of
+        *    16-bit unsigned integers. Then, each component is converted to
+        *    a normalized floating-point value to generate the returned
+        *    two-component vector.
+        *
+        *    The conversion for unpacked fixed-point value f to floating point is
+        *    done as follows:
+        *
+        *       unpackUnorm2x16: f / 65535.0
+        *
+        *    The first component of the returned vector will be extracted from the
+        *    least significant bits of the input; the last component will be
+        *    extracted from the most significant bits.
+        *
+        * This function generates IR that approximates the following pseudo-GLSL:
+        *
+        *     return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0;
+        */
+ 
+       assert(uint_rval->type == glsl_type::uint_type);
+ 
+       ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)),
+                               constant(65535.0f));
+ 
+       assert(result->type == glsl_type::vec2_type);
+       return result;
+    }
+ 
+    /**
+     * \brief Lower an unpackUnorm4x8 expression.
+     *
+     * \param uint_rval is unpackUnorm4x8's input
+     * \return unpackUnorm4x8's output as a vec4 rvalue
+     */
+    ir_rvalue*
+    lower_unpack_unorm_4x8(ir_rvalue *uint_rval)
+    {
+       /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+        *
+        *    highp vec4 unpackUnorm4x8 (highp uint p)
+        *    ----------------------------------------
+        *    First, unpacks a single 32-bit unsigned integer p into four
+        *    8-bit unsigned integers. Then, each component is converted to
+        *    a normalized floating-point value to generate the returned
+        *    two-component vector.
+        *
+        *    The conversion for unpacked fixed-point value f to floating point is
+        *    done as follows:
+        *
+        *       unpackUnorm4x8: f / 255.0
+        *
+        *    The first component of the returned vector will be extracted from the
+        *    least significant bits of the input; the last component will be
+        *    extracted from the most significant bits.
+        *
+        * This function generates IR that approximates the following pseudo-GLSL:
+        *
+        *     return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0;
+        */
+ 
+       assert(uint_rval->type == glsl_type::uint_type);
+ 
+       ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)),
+                               constant(255.0f));
+ 
+       assert(result->type == glsl_type::vec4_type);
+       return result;
+    }
+ 
+    /**
+     * \brief Lower the component-wise calculation of packHalf2x16.
+     *
+     * \param f_rval is one component of packHafl2x16's input
+     * \param e_rval is the unshifted exponent bits of f_rval
+     * \param m_rval is the unshifted mantissa bits of f_rval
+     *
+     * \return a uint rvalue that encodes a float16 in its lower 16 bits
+     */
+    ir_rvalue*
+    pack_half_1x16_nosign(ir_rvalue *f_rval,
+                          ir_rvalue *e_rval,
+                          ir_rvalue *m_rval)
+    {
+       assert(e_rval->type == glsl_type::uint_type);
+       assert(m_rval->type == glsl_type::uint_type);
+ 
+       /* uint u16; */
+       ir_variable *u16 = factory.make_temp(glsl_type::uint_type,
+                                            "tmp_pack_half_1x16_u16");
+ 
+       /* float f = FLOAT_RVAL; */
+       ir_variable *f = factory.make_temp(glsl_type::float_type,
+                                           "tmp_pack_half_1x16_f");
+       factory.emit(assign(f, f_rval));
+ 
+       /* uint e = E_RVAL; */
+       ir_variable *e = factory.make_temp(glsl_type::uint_type,
+                                           "tmp_pack_half_1x16_e");
+       factory.emit(assign(e, e_rval));
+ 
+       /* uint m = M_RVAL; */
+       ir_variable *m = factory.make_temp(glsl_type::uint_type,
+                                           "tmp_pack_half_1x16_m");
+       factory.emit(assign(m, m_rval));
+ 
+       /* Preliminaries
+        * -------------
+        *
+        * For a float16, the bit layout is:
+        *
+        *   sign:     15
+        *   exponent: 10:14
+        *   mantissa: 0:9
+        *
+        * Let f16 be a float16 value. The sign, exponent, and mantissa
+        * determine its value thus:
+        *
+        *   if e16 = 0 and m16 = 0, then zero:       (-1)^s16 * 0                               (1)
+        *   if e16 = 0 and m16!= 0, then subnormal:  (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10)     (2)
+        *   if 0 < e16 < 31, then normal:            (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3)
+        *   if e16 = 31 and m16 = 0, then infinite:  (-1)^s16 * inf                             (4)
+        *   if e16 = 31 and m16 != 0, then           NaN                                        (5)
+        *
+        * where 0 <= m16 < 2^10.
+        *
+        * For a float32, the bit layout is:
+        *
+        *   sign:     31
+        *   exponent: 23:30
+        *   mantissa: 0:22
+        *
+        * Let f32 be a float32 value. The sign, exponent, and mantissa
+        * determine its value thus:
+        *
+        *   if e32 = 0 and m32 = 0, then zero:        (-1)^s * 0                                (10)
+        *   if e32 = 0 and m32 != 0, then subnormal:  (-1)^s * 2^(e32 - 126) * (m32 / 2^23)     (11)
+        *   if 0 < e32 < 255, then normal:            (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12)
+        *   if e32 = 255 and m32 = 0, then infinite:  (-1)^s * inf                              (13)
+        *   if e32 = 255 and m32 != 0, then           NaN                                       (14)
+        *
+        * where 0 <= m32 < 2^23.
+        *
+        * The minimum and maximum normal float16 values are
+        *
+        *   min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14)   (20)
+        *   max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10)         (21)
+        *
+        * The step at max_norm16 is
+        *
+        *   max_step16 = 2^5                                     (22)
+        *
+        * Observe that the float16 boundary values in equations 20-21 lie in the
+        * range of normal float32 values.
+        *
+        *
+        * Rounding Behavior
+        * -----------------
+        * Not all float32 values can be exactly represented as a float16. We
+        * round all such intermediate float32 values to the nearest float16; if
+        * the float32 is exactly between to float16 values, we round to the one
+        * with an even mantissa. This rounding behavior has several benefits:
+        *
+        *   - It has no sign bias.
+        *
+        *   - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
+        *     GPU ISA.
+        *
+        *   - By reproducing the behavior of the GPU (at least on Intel hardware),
+        *     compile-time evaluation of constant packHalf2x16 GLSL expressions will
+        *     result in the same value as if the expression were executed on the
+        *     GPU.
+        *
+        * Calculation
+        * -----------
+        * Our task is to compute s16, e16, m16 given f32.  Since this function
+        * ignores the sign bit, assume that s32 = s16 = 0.  There are several
+        * cases consider.
+        */
+ 
+       factory.emit(
+ 
+          /* Case 1) f32 is NaN
+           *
+           *   The resultant f16 will also be NaN.
+           */
+ 
+          /* if (e32 == 255 && m32 != 0) { */
+          if_tree(logic_and(equal(e, constant(0xffu << 23u)),
+                            logic_not(equal(m, constant(0u)))),
+ 
+             assign(u16, constant(0x7fffu)),
+ 
+          /* Case 2) f32 lies in the range [0, min_norm16).
+           *
+           *   The resultant float16 will be either zero, subnormal, or normal.
+           *
+           *   Solving
+           *
+           *     f32 = min_norm16       (30)
+           *
+           *   gives
+           *
+           *     e32 = 113 and m32 = 0  (31)
+           *
+           *   Therefore this case occurs if and only if
+           *
+           *     e32 < 113              (32)
+           */
+ 
+          /* } else if (e32 < 113) { */
+          if_tree(less(e, constant(113u << 23u)),
+ 
+             /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */
+             assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f),
+                                            constant((float) (1 << 24)))))),
+ 
+          /* Case 3) f32 lies in the range
+           *         [min_norm16, max_norm16 + max_step16).
+           *
+           *   The resultant float16 will be either normal or infinite.
+           *
+           *   Solving
+           *
+           *     f32 = max_norm16 + max_step16           (40)
+           *         = 2^15 * (1 + 1023 / 2^10) + 2^5    (41)
+           *         = 2^16                              (42)
+           *   gives
+           *
+           *     e32 = 143 and m32 = 0                   (43)
+           *
+           *   We already solved the boundary condition f32 = min_norm16 above
+           *   in equation 31. Therefore this case occurs if and only if
+           *
+           *     113 <= e32 and e32 < 143
+           */
+ 
+          /* } else if (e32 < 143) { */
+          if_tree(less(e, constant(143u << 23u)),
+ 
+             /* The addition below handles the case where the mantissa rounds
+              * up to 1024 and bumps the exponent.
+              *
+              * u16 = ((e - (112u << 23u)) >> 13u)
+              *     + round_to_even((float(m) / (1u << 13u));
+              */
+             assign(u16, add(rshift(sub(e, constant(112u << 23u)),
+                                    constant(13u)),
+                             f2u(round_even(
+                                   div(u2f(m), constant((float) (1 << 13))))))),
+ 
+          /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf].
+           *
+           *   The resultant float16 will be infinite.
+           *
+           *   The cases above caught all float32 values in the range
+           *   [0, max_norm16 + max_step16), so this is the fall-through case.
+           */
+ 
+          /* } else { */
+ 
+             assign(u16, constant(31u << 10u))))));
+ 
+          /* } */
+ 
+        return deref(u16).val;
+    }
+ 
+    /**
+     * \brief Lower a packHalf2x16 expression.
+     *
+     * \param vec2_rval is packHalf2x16's input
+     * \return packHalf2x16's output as a uint rvalue
+     */
+    ir_rvalue*
+    lower_pack_half_2x16(ir_rvalue *vec2_rval)
+    {
+       /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec:
+        *
+        *    highp uint packHalf2x16 (mediump vec2 v)
+        *    ----------------------------------------
+        *    Returns an unsigned integer obtained by converting the components of
+        *    a two-component floating-point vector to the 16-bit floating-point
+        *    representation found in the OpenGL ES Specification, and then packing
+        *    these two 16-bit integers into a 32-bit unsigned integer.
+        *
+        *    The first vector component specifies the 16 least- significant bits
+        *    of the result; the second component specifies the 16 most-significant
+        *    bits.
+        */
+ 
+       assert(vec2_rval->type == glsl_type::vec2_type);
+ 
+       /* vec2 f = VEC2_RVAL; */
+       ir_variable *f = factory.make_temp(glsl_type::vec2_type,
+                                          "tmp_pack_half_2x16_f");
+       factory.emit(assign(f, vec2_rval));
+ 
+       /* uvec2 f32 = bitcast_f2u(f); */
+       ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type,
+                                             "tmp_pack_half_2x16_f32");
+       factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f)));
+ 
+       /* uvec2 f16; */
+       ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type,
+                                         "tmp_pack_half_2x16_f16");
+ 
+       /* Get f32's unshifted exponent bits.
+        *
+        *   uvec2 e = f32 & 0x7f800000u;
+        */
+       ir_variable *e = factory.make_temp(glsl_type::uvec2_type,
+                                           "tmp_pack_half_2x16_e");
+       factory.emit(assign(e, bit_and(f32, constant(0x7f800000u))));
+ 
+       /* Get f32's unshifted mantissa bits.
+        *
+        *   uvec2 m = f32 & 0x007fffffu;
+        */
+       ir_variable *m = factory.make_temp(glsl_type::uvec2_type,
+                                           "tmp_pack_half_2x16_m");
+       factory.emit(assign(m, bit_and(f32, constant(0x007fffffu))));
+ 
+       /* Set f16's exponent and mantissa bits.
+        *
+        *   f16.x = pack_half_1x16_nosign(e.x, m.x);
+        *   f16.y = pack_half_1y16_nosign(e.y, m.y);
+        */
+       factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f),
+                                                      swizzle_x(e),
+                                                      swizzle_x(m)),
+                            WRITEMASK_X));
+       factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f),
+                                                      swizzle_y(e),
+                                                      swizzle_y(m)),
+                            WRITEMASK_Y));
+ 
+       /* Set f16's sign bits.
+        *
+        *   f16 |= (f32 & (1u << 31u) >> 16u;
+        */
+       factory.emit(
+          assign(f16, bit_or(f16,
+                             rshift(bit_and(f32, constant(1u << 31u)),
+                                    constant(16u)))));
+ 
+ 
+       /* return (f16.y << 16u) | f16.x; */
+       ir_rvalue *result = bit_or(lshift(swizzle_y(f16),
+                                         constant(16u)),
+                                  swizzle_x(f16));
+ 
+       assert(result->type == glsl_type::uint_type);
+       return result;
+    }
+ 
- -
- -   /**
- -    * \brief Split unpackHalf2x16 into two operations.
- -    *
- -    * \param uint_rval is unpackHalf2x16's input
- -    * \return a vec2 rvalue
- -    *
- -    * Some code generators, such as the i965 fragment shader, require that all
- -    * vector expressions be lowered to a sequence of scalar expressions.
- -    * However, unpackHalf2x16 cannot be scalarized by the same method as
- -    * a true vector operation because the number of components of its input
- -    * and output differ.
- -    *
- -    * This method scalarizes unpackHalf2x16 by transforming it from a single
- -    * operation having vec2 output to a pair of operations each having float
- -    * output. That is, it transforms
- -    *
- -    *   unpackHalf2x16(UINT_RVAL)
- -    *
- -    * into
- -    *
- -    *   uint u = UINT_RVAL;
- -    *   vec2 v;
- -    *
- -    *   v.x = unpackHalf2x16_split_x(u);
- -    *   v.y = unpackHalf2x16_split_y(u);
- -    *
- -    *   return v;
- -    */
- -   ir_rvalue*
- -   split_unpack_half_2x16(ir_rvalue *uint_rval)
- -   {
- -      assert(uint_rval->type == glsl_type::uint_type);
- -
- -      /* uint u = uint_rval; */
- -      ir_variable *u = factory.make_temp(glsl_type::uint_type,
- -                                          "tmp_split_unpack_half_2x16_u");
- -      factory.emit(assign(u, uint_rval));
- -
- -      /* vec2 v; */
- -      ir_variable *v = factory.make_temp(glsl_type::vec2_type,
- -                                          "tmp_split_unpack_half_2x16_v");
- -
- -      /* v.x = unpack_half_2x16_split_x(u); */
- -      factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u),
- -                           WRITEMASK_X));
- -
- -      /* v.y = unpack_half_2x16_split_y(u); */
- -      factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u),
- -                           WRITEMASK_Y));
- -
- -      return deref(v).val;
- -   }
+    /**
+     * \brief Lower the component-wise calculation of unpackHalf2x16.
+     *
+     * Given a uint that encodes a float16 in its lower 16 bits, this function
+     * returns a uint that encodes a float32 with the same value. The sign bit
+     * of the float16 is ignored.
+     *
+     * \param e_rval is the unshifted exponent bits of a float16
+     * \param m_rval is the unshifted mantissa bits of a float16
+     * \param a uint rvalue that encodes a float32
+     */
+    ir_rvalue*
+    unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval)
+    {
+       assert(e_rval->type == glsl_type::uint_type);
+       assert(m_rval->type == glsl_type::uint_type);
+ 
+       /* uint u32; */
+       ir_variable *u32 = factory.make_temp(glsl_type::uint_type,
+                                            "tmp_unpack_half_1x16_u32");
+ 
+       /* uint e = E_RVAL; */
+       ir_variable *e = factory.make_temp(glsl_type::uint_type,
+                                           "tmp_unpack_half_1x16_e");
+       factory.emit(assign(e, e_rval));
+ 
+       /* uint m = M_RVAL; */
+       ir_variable *m = factory.make_temp(glsl_type::uint_type,
+                                           "tmp_unpack_half_1x16_m");
+       factory.emit(assign(m, m_rval));
+ 
+       /* Preliminaries
+        * -------------
+        *
+        * For a float16, the bit layout is:
+        *
+        *   sign:     15
+        *   exponent: 10:14
+        *   mantissa: 0:9
+        *
+        * Let f16 be a float16 value. The sign, exponent, and mantissa
+        * determine its value thus:
+        *
+        *   if e16 = 0 and m16 = 0, then zero:       (-1)^s16 * 0                               (1)
+        *   if e16 = 0 and m16!= 0, then subnormal:  (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10)     (2)
+        *   if 0 < e16 < 31, then normal:            (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3)
+        *   if e16 = 31 and m16 = 0, then infinite:  (-1)^s16 * inf                             (4)
+        *   if e16 = 31 and m16 != 0, then           NaN                                        (5)
+        *
+        * where 0 <= m16 < 2^10.
+        *
+        * For a float32, the bit layout is:
+        *
+        *   sign: 31
+        *   exponent: 23:30
+        *   mantissa: 0:22
+        *
+        * Let f32 be a float32 value. The sign, exponent, and mantissa
+        * determine its value thus:
+        *
+        *   if e32 = 0 and m32 = 0, then zero:        (-1)^s * 0                                (10)
+        *   if e32 = 0 and m32 != 0, then subnormal:  (-1)^s * 2^(e32 - 126) * (m32 / 2^23)     (11)
+        *   if 0 < e32 < 255, then normal:            (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12)
+        *   if e32 = 255 and m32 = 0, then infinite:  (-1)^s * inf                              (13)
+        *   if e32 = 255 and m32 != 0, then           NaN                                       (14)
+        *
+        * where 0 <= m32 < 2^23.
+        *
+        * Calculation
+        * -----------
+        * Our task is to compute s32, e32, m32 given f16.  Since this function
+        * ignores the sign bit, assume that s32 = s16 = 0.  There are several
+        * cases consider.
+        */
+ 
+       factory.emit(
+ 
+          /* Case 1) f16 is zero or subnormal.
+           *
+           *   The simplest method of calcuating f32 in this case is
+           *
+           *     f32 = f16                       (20)
+           *         = 2^(-14) * (m16 / 2^10)    (21)
+           *         = m16 / 2^(-24)             (22)
+           */
+ 
+          /* if (e16 == 0) { */
+          if_tree(equal(e, constant(0u)),
+ 
+             /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */
+             assign(u32, expr(ir_unop_bitcast_f2u,
+                                 div(u2f(m), constant((float)(1 << 24))))),
+ 
+          /* Case 2) f16 is normal.
+           *
+           *   The equation
+           *
+           *     f32 = f16                              (30)
+           *     2^(e32 - 127) * (1 + m32 / 2^23) =     (31)
+           *       2^(e16 - 15) * (1 + m16 / 2^10)
+           *
+           *   can be decomposed into two
+           *
+           *     2^(e32 - 127) = 2^(e16 - 15)           (32)
+           *     1 + m32 / 2^23 = 1 + m16 / 2^10        (33)
+           *
+           *   which solve to
+           *
+           *     e32 = e16 + 112                        (34)
+           *     m32 = m16 * 2^13                       (35)
+           */
+ 
+          /* } else if (e16 < 31)) { */
+          if_tree(less(e, constant(31u << 10u)),
+ 
+               /* u32 = ((e + (112 << 10)) | m) << 13;
+                */
+               assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m),
+                                  constant(13u))),
+ 
+ 
+          /* Case 3) f16 is infinite. */
+          if_tree(equal(m, constant(0u)),
+ 
+                  assign(u32, constant(255u << 23u)),
+ 
+          /* Case 4) f16 is NaN. */
+          /* } else { */
+ 
+             assign(u32, constant(0x7fffffffu))))));
+ 
+          /* } */
+ 
+       return deref(u32).val;
+    }
+ 
+    /**
+     * \brief Lower an unpackHalf2x16 expression.
+     *
+     * \param uint_rval is unpackHalf2x16's input
+     * \return unpackHalf2x16's output as a vec2 rvalue
+     */
+    ir_rvalue*
+    lower_unpack_half_2x16(ir_rvalue *uint_rval)
+    {
+       /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec:
+        *
+        *    mediump vec2 unpackHalf2x16 (highp uint v)
+        *    ------------------------------------------
+        *    Returns a two-component floating-point vector with components
+        *    obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit
+        *    values, interpreting those values as 16-bit floating-point numbers
+        *    according to the OpenGL ES Specification, and converting them to
+        *    32-bit floating-point values.
+        *
+        *    The first component of the vector is obtained from the
+        *    16 least-significant bits of v; the second component is obtained
+        *    from the 16 most-significant bits of v.
+        */
+       assert(uint_rval->type == glsl_type::uint_type);
+ 
+       /* uint u = RVALUE;
+        * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16);
+        */
+       ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type,
+                                             "tmp_unpack_half_2x16_f16");
+       factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval)));
+ 
+       /* uvec2 f32; */
+       ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type,
+                                             "tmp_unpack_half_2x16_f32");
+ 
+       /* Get f16's unshifted exponent bits.
+        *
+        *    uvec2 e = f16 & 0x7c00u;
+        */
+       ir_variable *e = factory.make_temp(glsl_type::uvec2_type,
+                                           "tmp_unpack_half_2x16_e");
+       factory.emit(assign(e, bit_and(f16, constant(0x7c00u))));
+ 
+       /* Get f16's unshifted mantissa bits.
+        *
+        *    uvec2 m = f16 & 0x03ffu;
+        */
+       ir_variable *m = factory.make_temp(glsl_type::uvec2_type,
+                                           "tmp_unpack_half_2x16_m");
+       factory.emit(assign(m, bit_and(f16, constant(0x03ffu))));
+ 
+       /* Set f32's exponent and mantissa bits.
+        *
+        *   f32.x = unpack_half_1x16_nosign(e.x, m.x);
+        *   f32.y = unpack_half_1x16_nosign(e.y, m.y);
+        */
+       factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e),
+                                                        swizzle_x(m)),
+                            WRITEMASK_X));
+       factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e),
+                                                        swizzle_y(m)),
+                            WRITEMASK_Y));
+ 
+       /* Set f32's sign bit.
+        *
+        *    f32 |= (f16 & 0x8000u) << 16u;
+        */
+       factory.emit(assign(f32, bit_or(f32,
+                                        lshift(bit_and(f16,
+                                                       constant(0x8000u)),
+                                               constant(16u)))));
+ 
+       /* return bitcast_u2f(f32); */
+       ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32);
+       assert(result->type == glsl_type::vec2_type);
+       return result;
+    }
+ };
+ 
+ } // namespace anonymous
+ 
+ /**
+  * \brief Lower the builtin packing functions.
+  *
+  * \param op_mask is a bitmask of `enum lower_packing_builtins_op`.
+  */
+ bool
+ lower_packing_builtins(exec_list *instructions, int op_mask)
+ {
+    lower_packing_builtins_visitor v(op_mask);
+    visit_list_elements(&v, instructions, true);
+    return v.get_progress();
+ }
diff --cc src/compiler/glsl/standalone_scaffolding.cpp

index 0000000000000000000000000000000000000000,d5d214b57cc07257fdae4f36e2afbdc9f2d77816..0f7a16a5e6f61bed9792c0707d88bc5349c81c38

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl/standalone_scaffolding.cpp
+++ b/src/compiler/glsl/standalone_scaffolding.cpp
@@@ -1,0 -1,221 +1,227 @@@
+ /*
+  * Copyright © 2011 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ /* This file declares stripped-down versions of functions that
+  * normally exist outside of the glsl folder, so that they can be used
+  * when running the GLSL compiler standalone (for unit testing or
+  * compiling builtins).
+  */
+ 
+ #include "standalone_scaffolding.h"
+ 
+ #include <assert.h>
+ #include <stdio.h>
+ #include <string.h>
+ #include "util/ralloc.h"
+ #include "util/strtod.h"
+ 
++extern "C" void
++_mesa_error_no_memory(const char *caller)
++{
++   fprintf(stderr, "Mesa error: out of memory in %s", caller);
++}
++
+ void
+ _mesa_warning(struct gl_context *ctx, const char *fmt, ...)
+ {
+     va_list vargs;
+     (void) ctx;
+ 
+     va_start(vargs, fmt);
+ 
+     /* This output is not thread-safe, but that's good enough for the
+      * standalone compiler.
+      */
+     fprintf(stderr, "Mesa warning: ");
+     vfprintf(stderr, fmt, vargs);
+     fprintf(stderr, "\n");
+ 
+     va_end(vargs);
+ }
+ 
+ void
+ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
+                        struct gl_shader *sh)
+ {
+    (void) ctx;
+    *ptr = sh;
+ }
+ 
+ void
+ _mesa_shader_debug(struct gl_context *, GLenum, GLuint *,
+                    const char *)
+ {
+ }
+ 
+ struct gl_shader *
+ _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type)
+ {
+    struct gl_shader *shader;
+ 
+    (void) ctx;
+ 
+    assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER);
+    shader = rzalloc(NULL, struct gl_shader);
+    if (shader) {
+       shader->Type = type;
+       shader->Stage = _mesa_shader_enum_to_shader_stage(type);
+       shader->Name = name;
+       shader->RefCount = 1;
+    }
+    return shader;
+ }
+ 
+ void
+ _mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh)
+ {
+    free((void *)sh->Source);
+    free(sh->Label);
+    ralloc_free(sh);
+ }
+ 
+ void
+ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
+ {
+    unsigned i;
+ 
+    shProg->NumUniformStorage = 0;
+    shProg->UniformStorage = NULL;
+    shProg->NumUniformRemapTable = 0;
+    shProg->UniformRemapTable = NULL;
+    shProg->UniformHash = NULL;
+ 
+    ralloc_free(shProg->InfoLog);
+    shProg->InfoLog = ralloc_strdup(shProg, "");
+ 
+    ralloc_free(shProg->BufferInterfaceBlocks);
+    shProg->BufferInterfaceBlocks = NULL;
+    shProg->NumBufferInterfaceBlocks = 0;
+ 
+    ralloc_free(shProg->UniformBlocks);
+    shProg->UniformBlocks = NULL;
+    shProg->NumUniformBlocks = 0;
+ 
+    ralloc_free(shProg->ShaderStorageBlocks);
+    shProg->ShaderStorageBlocks = NULL;
+    shProg->NumShaderStorageBlocks = 0;
+ 
+    for (i = 0; i < MESA_SHADER_STAGES; i++) {
+       ralloc_free(shProg->InterfaceBlockStageIndex[i]);
+       shProg->InterfaceBlockStageIndex[i] = NULL;
+    }
+ 
+    ralloc_free(shProg->UboInterfaceBlockIndex);
+    shProg->UboInterfaceBlockIndex = NULL;
+    ralloc_free(shProg->SsboInterfaceBlockIndex);
+    shProg->SsboInterfaceBlockIndex = NULL;
+ 
+    ralloc_free(shProg->AtomicBuffers);
+    shProg->AtomicBuffers = NULL;
+    shProg->NumAtomicBuffers = 0;
+ }
+ 
+ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
+ {
+    memset(ctx, 0, sizeof(*ctx));
+ 
+    ctx->API = api;
+ 
+    ctx->Extensions.dummy_false = false;
+    ctx->Extensions.dummy_true = true;
+    ctx->Extensions.ARB_compute_shader = true;
+    ctx->Extensions.ARB_conservative_depth = true;
+    ctx->Extensions.ARB_draw_instanced = true;
+    ctx->Extensions.ARB_ES2_compatibility = true;
+    ctx->Extensions.ARB_ES3_compatibility = true;
+    ctx->Extensions.ARB_explicit_attrib_location = true;
+    ctx->Extensions.ARB_fragment_coord_conventions = true;
+    ctx->Extensions.ARB_fragment_layer_viewport = true;
+    ctx->Extensions.ARB_gpu_shader5 = true;
+    ctx->Extensions.ARB_gpu_shader_fp64 = true;
+    ctx->Extensions.ARB_sample_shading = true;
+    ctx->Extensions.ARB_shader_bit_encoding = true;
+    ctx->Extensions.ARB_shader_draw_parameters = true;
+    ctx->Extensions.ARB_shader_stencil_export = true;
+    ctx->Extensions.ARB_shader_subroutine = true;
+    ctx->Extensions.ARB_shader_texture_lod = true;
+    ctx->Extensions.ARB_shading_language_420pack = true;
+    ctx->Extensions.ARB_shading_language_packing = true;
+    ctx->Extensions.ARB_tessellation_shader = true;
+    ctx->Extensions.ARB_texture_cube_map_array = true;
+    ctx->Extensions.ARB_texture_gather = true;
+    ctx->Extensions.ARB_texture_multisample = true;
+    ctx->Extensions.ARB_texture_query_levels = true;
+    ctx->Extensions.ARB_texture_query_lod = true;
+    ctx->Extensions.ARB_uniform_buffer_object = true;
+    ctx->Extensions.ARB_viewport_array = true;
+ 
+    ctx->Extensions.OES_EGL_image_external = true;
+    ctx->Extensions.OES_standard_derivatives = true;
+ 
+    ctx->Extensions.EXT_shader_integer_mix = true;
+    ctx->Extensions.EXT_texture_array = true;
+ 
+    ctx->Extensions.NV_texture_rectangle = true;
+ 
+    ctx->Const.GLSLVersion = 120;
+ 
+    /* 1.20 minimums. */
+    ctx->Const.MaxLights = 8;
+    ctx->Const.MaxClipPlanes = 6;
+    ctx->Const.MaxTextureUnits = 2;
+    ctx->Const.MaxTextureCoordUnits = 2;
+    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16;
+ 
+    ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512;
+    ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32;
+    ctx->Const.MaxVarying = 8; /* == gl_MaxVaryingFloats / 4 */
+    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0;
+    ctx->Const.MaxCombinedTextureImageUnits = 2;
+    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 2;
+    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64;
+    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 32;
+ 
+    ctx->Const.MaxDrawBuffers = 1;
+    ctx->Const.MaxComputeWorkGroupCount[0] = 65535;
+    ctx->Const.MaxComputeWorkGroupCount[1] = 65535;
+    ctx->Const.MaxComputeWorkGroupCount[2] = 65535;
+    ctx->Const.MaxComputeWorkGroupSize[0] = 1024;
+    ctx->Const.MaxComputeWorkGroupSize[1] = 1024;
+    ctx->Const.MaxComputeWorkGroupSize[2] = 64;
+    ctx->Const.MaxComputeWorkGroupInvocations = 1024;
+    ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16;
+    ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024;
+    ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */
+    ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */
+ 
+    /* Set up default shader compiler options. */
+    struct gl_shader_compiler_options options;
+    memset(&options, 0, sizeof(options));
+    options.MaxUnrollIterations = 32;
+    options.MaxIfDepth = UINT_MAX;
+ 
+    for (int sh = 0; sh < MESA_SHADER_STAGES; ++sh)
+       memcpy(&ctx->Const.ShaderCompilerOptions[sh], &options, sizeof(options));
+ 
+    _mesa_locale_init();
+ }
diff --cc src/compiler/glsl_types.cpp

index 0000000000000000000000000000000000000000,17ebf07acbc127d014618677089174476de57227..5920c2e2611616f529318737068acf46214a1acf

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@@ -1,0 -1,1758 +1,1950 @@@
+ /*
+  * Copyright © 2009 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ #include <stdio.h>
+ #include "main/macros.h"
+ #include "compiler/glsl/glsl_parser_extras.h"
+ #include "glsl_types.h"
+ #include "util/hash_table.h"
+ 
+ 
+ mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP;
+ hash_table *glsl_type::array_types = NULL;
+ hash_table *glsl_type::record_types = NULL;
+ hash_table *glsl_type::interface_types = NULL;
++hash_table *glsl_type::function_types = NULL;
+ hash_table *glsl_type::subroutine_types = NULL;
+ void *glsl_type::mem_ctx = NULL;
+ 
+ void
+ glsl_type::init_ralloc_type_ctx(void)
+ {
+    if (glsl_type::mem_ctx == NULL) {
+       glsl_type::mem_ctx = ralloc_autofree_context();
+       assert(glsl_type::mem_ctx != NULL);
+    }
+ }
+ 
+ glsl_type::glsl_type(GLenum gl_type,
+                      glsl_base_type base_type, unsigned vector_elements,
+                      unsigned matrix_columns, const char *name) :
+    gl_type(gl_type),
+    base_type(base_type),
+    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+    sampler_type(0), interface_packing(0),
+    vector_elements(vector_elements), matrix_columns(matrix_columns),
+    length(0)
+ {
+    mtx_lock(&glsl_type::mutex);
+ 
+    init_ralloc_type_ctx();
+    assert(name != NULL);
+    this->name = ralloc_strdup(this->mem_ctx, name);
+ 
+    mtx_unlock(&glsl_type::mutex);
+ 
+    /* Neither dimension is zero or both dimensions are zero.
+     */
+    assert((vector_elements == 0) == (matrix_columns == 0));
+    memset(& fields, 0, sizeof(fields));
+ }
+ 
+ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
+                      enum glsl_sampler_dim dim, bool shadow, bool array,
+                      unsigned type, const char *name) :
+    gl_type(gl_type),
+    base_type(base_type),
+    sampler_dimensionality(dim), sampler_shadow(shadow),
+    sampler_array(array), sampler_type(type), interface_packing(0),
+    length(0)
+ {
+    mtx_lock(&glsl_type::mutex);
+ 
+    init_ralloc_type_ctx();
+    assert(name != NULL);
+    this->name = ralloc_strdup(this->mem_ctx, name);
+ 
+    mtx_unlock(&glsl_type::mutex);
+ 
+    memset(& fields, 0, sizeof(fields));
+ 
+    if (base_type == GLSL_TYPE_SAMPLER) {
+       /* Samplers take no storage whatsoever. */
+       matrix_columns = vector_elements = 0;
+    } else {
+       matrix_columns = vector_elements = 1;
+    }
+ }
+ 
+ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
+                      const char *name) :
+    gl_type(0),
+    base_type(GLSL_TYPE_STRUCT),
+    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+    sampler_type(0), interface_packing(0),
+    vector_elements(0), matrix_columns(0),
+    length(num_fields)
+ {
+    unsigned int i;
+ 
+    mtx_lock(&glsl_type::mutex);
+ 
+    init_ralloc_type_ctx();
+    assert(name != NULL);
+    this->name = ralloc_strdup(this->mem_ctx, name);
+    this->fields.structure = ralloc_array(this->mem_ctx,
+                                          glsl_struct_field, length);
+ 
+    for (i = 0; i < length; i++) {
+       this->fields.structure[i].type = fields[i].type;
+       this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
+                                                      fields[i].name);
+       this->fields.structure[i].location = fields[i].location;
+       this->fields.structure[i].interpolation = fields[i].interpolation;
+       this->fields.structure[i].centroid = fields[i].centroid;
+       this->fields.structure[i].sample = fields[i].sample;
+       this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
+       this->fields.structure[i].patch = fields[i].patch;
+       this->fields.structure[i].image_read_only = fields[i].image_read_only;
+       this->fields.structure[i].image_write_only = fields[i].image_write_only;
+       this->fields.structure[i].image_coherent = fields[i].image_coherent;
+       this->fields.structure[i].image_volatile = fields[i].image_volatile;
+       this->fields.structure[i].image_restrict = fields[i].image_restrict;
+       this->fields.structure[i].precision = fields[i].precision;
+    }
+ 
+    mtx_unlock(&glsl_type::mutex);
+ }
+ 
+ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
+                      enum glsl_interface_packing packing, const char *name) :
+    gl_type(0),
+    base_type(GLSL_TYPE_INTERFACE),
+    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+    sampler_type(0), interface_packing((unsigned) packing),
+    vector_elements(0), matrix_columns(0),
+    length(num_fields)
+ {
+    unsigned int i;
+ 
+    mtx_lock(&glsl_type::mutex);
+ 
+    init_ralloc_type_ctx();
+    assert(name != NULL);
+    this->name = ralloc_strdup(this->mem_ctx, name);
+    this->fields.structure = ralloc_array(this->mem_ctx,
+                                          glsl_struct_field, length);
+    for (i = 0; i < length; i++) {
+       this->fields.structure[i].type = fields[i].type;
+       this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
+                                                      fields[i].name);
+       this->fields.structure[i].location = fields[i].location;
+       this->fields.structure[i].interpolation = fields[i].interpolation;
+       this->fields.structure[i].centroid = fields[i].centroid;
+       this->fields.structure[i].sample = fields[i].sample;
+       this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
+       this->fields.structure[i].patch = fields[i].patch;
+       this->fields.structure[i].precision = fields[i].precision;
+    }
+ 
+    mtx_unlock(&glsl_type::mutex);
+ }
+ 
++glsl_type::glsl_type(const glsl_type *return_type,
++                     const glsl_function_param *params, unsigned num_params) :
++   gl_type(0),
++   base_type(GLSL_TYPE_FUNCTION),
++   sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
++   sampler_type(0), interface_packing(0),
++   vector_elements(0), matrix_columns(0),
++   length(num_params)
++{
++   unsigned int i;
++
++   mtx_lock(&glsl_type::mutex);
++
++   init_ralloc_type_ctx();
++
++   this->fields.parameters = rzalloc_array(this->mem_ctx,
++                                           glsl_function_param, num_params + 1);
++
++   /* We store the return type as the first parameter */
++   this->fields.parameters[0].type = return_type;
++   this->fields.parameters[0].in = false;
++   this->fields.parameters[0].out = true;
++
++   /* We store the i'th parameter in slot i+1 */
++   for (i = 0; i < length; i++) {
++      this->fields.parameters[i + 1].type = params[i].type;
++      this->fields.parameters[i + 1].in = params[i].in;
++      this->fields.parameters[i + 1].out = params[i].out;
++   }
++
++   mtx_unlock(&glsl_type::mutex);
++}
++
+ glsl_type::glsl_type(const char *subroutine_name) :
+    gl_type(0),
+    base_type(GLSL_TYPE_SUBROUTINE),
+    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+    sampler_type(0), interface_packing(0),
+    vector_elements(1), matrix_columns(1),
+    length(0)
+ {
+    mtx_lock(&glsl_type::mutex);
+ 
+    init_ralloc_type_ctx();
+    assert(subroutine_name != NULL);
+    this->name = ralloc_strdup(this->mem_ctx, subroutine_name);
+    mtx_unlock(&glsl_type::mutex);
+ }
+ 
+ bool
+ glsl_type::contains_sampler() const
+ {
+    if (this->is_array()) {
+       return this->fields.array->contains_sampler();
+    } else if (this->is_record()) {
+       for (unsigned int i = 0; i < this->length; i++) {
+          if (this->fields.structure[i].type->contains_sampler())
+             return true;
+       }
+       return false;
+    } else {
+       return this->is_sampler();
+    }
+ }
+ 
+ 
+ bool
+ glsl_type::contains_integer() const
+ {
+    if (this->is_array()) {
+       return this->fields.array->contains_integer();
+    } else if (this->is_record()) {
+       for (unsigned int i = 0; i < this->length; i++) {
+          if (this->fields.structure[i].type->contains_integer())
+             return true;
+       }
+       return false;
+    } else {
+       return this->is_integer();
+    }
+ }
+ 
+ bool
+ glsl_type::contains_double() const
+ {
+    if (this->is_array()) {
+       return this->fields.array->contains_double();
+    } else if (this->is_record()) {
+       for (unsigned int i = 0; i < this->length; i++) {
+          if (this->fields.structure[i].type->contains_double())
+             return true;
+       }
+       return false;
+    } else {
+       return this->is_double();
+    }
+ }
+ 
+ bool
+ glsl_type::contains_opaque() const {
+    switch (base_type) {
+    case GLSL_TYPE_SAMPLER:
+    case GLSL_TYPE_IMAGE:
+    case GLSL_TYPE_ATOMIC_UINT:
+       return true;
+    case GLSL_TYPE_ARRAY:
+       return fields.array->contains_opaque();
+    case GLSL_TYPE_STRUCT:
+       for (unsigned int i = 0; i < length; i++) {
+          if (fields.structure[i].type->contains_opaque())
+             return true;
+       }
+       return false;
+    default:
+       return false;
+    }
+ }
+ 
+ bool
+ glsl_type::contains_subroutine() const
+ {
+    if (this->is_array()) {
+       return this->fields.array->contains_subroutine();
+    } else if (this->is_record()) {
+       for (unsigned int i = 0; i < this->length; i++) {
+          if (this->fields.structure[i].type->contains_subroutine())
+             return true;
+       }
+       return false;
+    } else {
+       return this->is_subroutine();
+    }
+ }
+ 
+ gl_texture_index
+ glsl_type::sampler_index() const
+ {
+    const glsl_type *const t = (this->is_array()) ? this->fields.array : this;
+ 
+    assert(t->is_sampler());
+ 
+    switch (t->sampler_dimensionality) {
+    case GLSL_SAMPLER_DIM_1D:
+       return (t->sampler_array) ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
+    case GLSL_SAMPLER_DIM_2D:
+       return (t->sampler_array) ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
+    case GLSL_SAMPLER_DIM_3D:
+       return TEXTURE_3D_INDEX;
+    case GLSL_SAMPLER_DIM_CUBE:
+       return (t->sampler_array) ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
+    case GLSL_SAMPLER_DIM_RECT:
+       return TEXTURE_RECT_INDEX;
+    case GLSL_SAMPLER_DIM_BUF:
+       return TEXTURE_BUFFER_INDEX;
+    case GLSL_SAMPLER_DIM_EXTERNAL:
+       return TEXTURE_EXTERNAL_INDEX;
+    case GLSL_SAMPLER_DIM_MS:
+       return (t->sampler_array) ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
+    default:
+       assert(!"Should not get here.");
+       return TEXTURE_BUFFER_INDEX;
+    }
+ }
+ 
+ bool
+ glsl_type::contains_image() const
+ {
+    if (this->is_array()) {
+       return this->fields.array->contains_image();
+    } else if (this->is_record()) {
+       for (unsigned int i = 0; i < this->length; i++) {
+          if (this->fields.structure[i].type->contains_image())
+             return true;
+       }
+       return false;
+    } else {
+       return this->is_image();
+    }
+ }
+ 
+ const glsl_type *glsl_type::get_base_type() const
+ {
+    switch (base_type) {
+    case GLSL_TYPE_UINT:
+       return uint_type;
+    case GLSL_TYPE_INT:
+       return int_type;
+    case GLSL_TYPE_FLOAT:
+       return float_type;
+    case GLSL_TYPE_DOUBLE:
+       return double_type;
+    case GLSL_TYPE_BOOL:
+       return bool_type;
+    default:
+       return error_type;
+    }
+ }
+ 
+ 
+ const glsl_type *glsl_type::get_scalar_type() const
+ {
+    const glsl_type *type = this;
+ 
+    /* Handle arrays */
+    while (type->base_type == GLSL_TYPE_ARRAY)
+       type = type->fields.array;
+ 
+    /* Handle vectors and matrices */
+    switch (type->base_type) {
+    case GLSL_TYPE_UINT:
+       return uint_type;
+    case GLSL_TYPE_INT:
+       return int_type;
+    case GLSL_TYPE_FLOAT:
+       return float_type;
+    case GLSL_TYPE_DOUBLE:
+       return double_type;
+    case GLSL_TYPE_BOOL:
+       return bool_type;
+    default:
+       /* Handle everything else */
+       return type;
+    }
+ }
+ 
+ 
+ void
+ _mesa_glsl_release_types(void)
+ {
+    /* Should only be called during atexit (either when unloading shared
+     * object, or if process terminates), so no mutex-locking should be
+     * necessary.
+     */
+    if (glsl_type::array_types != NULL) {
+       _mesa_hash_table_destroy(glsl_type::array_types, NULL);
+       glsl_type::array_types = NULL;
+    }
+ 
+    if (glsl_type::record_types != NULL) {
+       _mesa_hash_table_destroy(glsl_type::record_types, NULL);
+       glsl_type::record_types = NULL;
+    }
+ 
+    if (glsl_type::interface_types != NULL) {
+       _mesa_hash_table_destroy(glsl_type::interface_types, NULL);
+       glsl_type::interface_types = NULL;
+    }
+ }
+ 
+ 
+ glsl_type::glsl_type(const glsl_type *array, unsigned length) :
+    base_type(GLSL_TYPE_ARRAY),
+    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+    sampler_type(0), interface_packing(0),
+    vector_elements(0), matrix_columns(0),
+    length(length), name(NULL)
+ {
+    this->fields.array = array;
+    /* Inherit the gl type of the base. The GL type is used for
+     * uniform/statevar handling in Mesa and the arrayness of the type
+     * is represented by the size rather than the type.
+     */
+    this->gl_type = array->gl_type;
+ 
+    /* Allow a maximum of 10 characters for the array size.  This is enough
+     * for 32-bits of ~0.  The extra 3 are for the '[', ']', and terminating
+     * NUL.
+     */
+    const unsigned name_length = strlen(array->name) + 10 + 3;
+ 
+    mtx_lock(&glsl_type::mutex);
+    char *const n = (char *) ralloc_size(this->mem_ctx, name_length);
+    mtx_unlock(&glsl_type::mutex);
+ 
+    if (length == 0)
+       snprintf(n, name_length, "%s[]", array->name);
+    else {
+       /* insert outermost dimensions in the correct spot
+        * otherwise the dimension order will be backwards
+        */
+       const char *pos = strchr(array->name, '[');
+       if (pos) {
+          int idx = pos - array->name;
+          snprintf(n, idx+1, "%s", array->name);
+          snprintf(n + idx, name_length - idx, "[%u]%s",
+                   length, array->name + idx);
+       } else {
+          snprintf(n, name_length, "%s[%u]", array->name, length);
+       }
+    }
+ 
+    this->name = n;
+ }
+ 
+ 
+ const glsl_type *
+ glsl_type::vec(unsigned components)
+ {
+    if (components == 0 || components > 4)
+       return error_type;
+ 
+    static const glsl_type *const ts[] = {
+       float_type, vec2_type, vec3_type, vec4_type
+    };
+    return ts[components - 1];
+ }
+ 
+ const glsl_type *
+ glsl_type::dvec(unsigned components)
+ {
+    if (components == 0 || components > 4)
+       return error_type;
+ 
+    static const glsl_type *const ts[] = {
+       double_type, dvec2_type, dvec3_type, dvec4_type
+    };
+    return ts[components - 1];
+ }
+ 
+ const glsl_type *
+ glsl_type::ivec(unsigned components)
+ {
+    if (components == 0 || components > 4)
+       return error_type;
+ 
+    static const glsl_type *const ts[] = {
+       int_type, ivec2_type, ivec3_type, ivec4_type
+    };
+    return ts[components - 1];
+ }
+ 
+ 
+ const glsl_type *
+ glsl_type::uvec(unsigned components)
+ {
+    if (components == 0 || components > 4)
+       return error_type;
+ 
+    static const glsl_type *const ts[] = {
+       uint_type, uvec2_type, uvec3_type, uvec4_type
+    };
+    return ts[components - 1];
+ }
+ 
+ 
+ const glsl_type *
+ glsl_type::bvec(unsigned components)
+ {
+    if (components == 0 || components > 4)
+       return error_type;
+ 
+    static const glsl_type *const ts[] = {
+       bool_type, bvec2_type, bvec3_type, bvec4_type
+    };
+    return ts[components - 1];
+ }
+ 
+ 
+ const glsl_type *
+ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
+ {
+    if (base_type == GLSL_TYPE_VOID)
+       return void_type;
+ 
+    if ((rows < 1) || (rows > 4) || (columns < 1) || (columns > 4))
+       return error_type;
+ 
+    /* Treat GLSL vectors as Nx1 matrices.
+     */
+    if (columns == 1) {
+       switch (base_type) {
+       case GLSL_TYPE_UINT:
+          return uvec(rows);
+       case GLSL_TYPE_INT:
+          return ivec(rows);
+       case GLSL_TYPE_FLOAT:
+          return vec(rows);
+       case GLSL_TYPE_DOUBLE:
+          return dvec(rows);
+       case GLSL_TYPE_BOOL:
+          return bvec(rows);
+       default:
+          return error_type;
+       }
+    } else {
+       if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1))
+          return error_type;
+ 
+       /* GLSL matrix types are named mat{COLUMNS}x{ROWS}.  Only the following
+        * combinations are valid:
+        *
+        *   1 2 3 4
+        * 1
+        * 2   x x x
+        * 3   x x x
+        * 4   x x x
+        */
+ #define IDX(c,r) (((c-1)*3) + (r-1))
+ 
+       if (base_type == GLSL_TYPE_DOUBLE) {
+          switch (IDX(columns, rows)) {
+          case IDX(2,2): return dmat2_type;
+          case IDX(2,3): return dmat2x3_type;
+          case IDX(2,4): return dmat2x4_type;
+          case IDX(3,2): return dmat3x2_type;
+          case IDX(3,3): return dmat3_type;
+          case IDX(3,4): return dmat3x4_type;
+          case IDX(4,2): return dmat4x2_type;
+          case IDX(4,3): return dmat4x3_type;
+          case IDX(4,4): return dmat4_type;
+          default: return error_type;
+          }
+       } else {
+          switch (IDX(columns, rows)) {
+          case IDX(2,2): return mat2_type;
+          case IDX(2,3): return mat2x3_type;
+          case IDX(2,4): return mat2x4_type;
+          case IDX(3,2): return mat3x2_type;
+          case IDX(3,3): return mat3_type;
+          case IDX(3,4): return mat3x4_type;
+          case IDX(4,2): return mat4x2_type;
+          case IDX(4,3): return mat4x3_type;
+          case IDX(4,4): return mat4_type;
+          default: return error_type;
+          }
+       }
+    }
+ 
+    assert(!"Should not get here.");
+    return error_type;
+ }
+ 
+ const glsl_type *
+ glsl_type::get_sampler_instance(enum glsl_sampler_dim dim,
+                                 bool shadow,
+                                 bool array,
+                                 glsl_base_type type)
+ {
+    switch (type) {
+    case GLSL_TYPE_FLOAT:
+       switch (dim) {
+       case GLSL_SAMPLER_DIM_1D:
+          if (shadow)
+             return (array ? sampler1DArrayShadow_type : sampler1DShadow_type);
+          else
+             return (array ? sampler1DArray_type : sampler1D_type);
+       case GLSL_SAMPLER_DIM_2D:
+          if (shadow)
+             return (array ? sampler2DArrayShadow_type : sampler2DShadow_type);
+          else
+             return (array ? sampler2DArray_type : sampler2D_type);
+       case GLSL_SAMPLER_DIM_3D:
+          if (shadow || array)
+             return error_type;
+          else
+             return sampler3D_type;
+       case GLSL_SAMPLER_DIM_CUBE:
+          if (shadow)
+             return (array ? samplerCubeArrayShadow_type : samplerCubeShadow_type);
+          else
+             return (array ? samplerCubeArray_type : samplerCube_type);
+       case GLSL_SAMPLER_DIM_RECT:
+          if (array)
+             return error_type;
+          if (shadow)
+             return sampler2DRectShadow_type;
+          else
+             return sampler2DRect_type;
+       case GLSL_SAMPLER_DIM_BUF:
+          if (shadow || array)
+             return error_type;
+          else
+             return samplerBuffer_type;
+       case GLSL_SAMPLER_DIM_MS:
+          if (shadow)
+             return error_type;
+          return (array ? sampler2DMSArray_type : sampler2DMS_type);
+       case GLSL_SAMPLER_DIM_EXTERNAL:
+          if (shadow || array)
+             return error_type;
+          else
+             return samplerExternalOES_type;
+       }
+    case GLSL_TYPE_INT:
+       if (shadow)
+          return error_type;
+       switch (dim) {
+       case GLSL_SAMPLER_DIM_1D:
+          return (array ? isampler1DArray_type : isampler1D_type);
+       case GLSL_SAMPLER_DIM_2D:
+          return (array ? isampler2DArray_type : isampler2D_type);
+       case GLSL_SAMPLER_DIM_3D:
+          if (array)
+             return error_type;
+          return isampler3D_type;
+       case GLSL_SAMPLER_DIM_CUBE:
+          return (array ? isamplerCubeArray_type : isamplerCube_type);
+       case GLSL_SAMPLER_DIM_RECT:
+          if (array)
+             return error_type;
+          return isampler2DRect_type;
+       case GLSL_SAMPLER_DIM_BUF:
+          if (array)
+             return error_type;
+          return isamplerBuffer_type;
+       case GLSL_SAMPLER_DIM_MS:
+          return (array ? isampler2DMSArray_type : isampler2DMS_type);
+       case GLSL_SAMPLER_DIM_EXTERNAL:
+          return error_type;
+       }
+    case GLSL_TYPE_UINT:
+       if (shadow)
+          return error_type;
+       switch (dim) {
+       case GLSL_SAMPLER_DIM_1D:
+          return (array ? usampler1DArray_type : usampler1D_type);
+       case GLSL_SAMPLER_DIM_2D:
+          return (array ? usampler2DArray_type : usampler2D_type);
+       case GLSL_SAMPLER_DIM_3D:
+          if (array)
+             return error_type;
+          return usampler3D_type;
+       case GLSL_SAMPLER_DIM_CUBE:
+          return (array ? usamplerCubeArray_type : usamplerCube_type);
+       case GLSL_SAMPLER_DIM_RECT:
+          if (array)
+             return error_type;
+          return usampler2DRect_type;
+       case GLSL_SAMPLER_DIM_BUF:
+          if (array)
+             return error_type;
+          return usamplerBuffer_type;
+       case GLSL_SAMPLER_DIM_MS:
+          return (array ? usampler2DMSArray_type : usampler2DMS_type);
+       case GLSL_SAMPLER_DIM_EXTERNAL:
+          return error_type;
+       }
+    default:
+       return error_type;
+    }
+ 
+    unreachable("switch statement above should be complete");
+ }
+ 
++const glsl_type *
++glsl_type::get_image_instance(enum glsl_sampler_dim dim,
++                              bool array, glsl_base_type type)
++{
++   switch (type) {
++   case GLSL_TYPE_FLOAT:
++      switch (dim) {
++      case GLSL_SAMPLER_DIM_1D:
++         return (array ? image1DArray_type : image1D_type);
++      case GLSL_SAMPLER_DIM_2D:
++         return (array ? image2DArray_type : image2D_type);
++      case GLSL_SAMPLER_DIM_3D:
++         return image3D_type;
++      case GLSL_SAMPLER_DIM_CUBE:
++         return (array ? imageCubeArray_type : imageCube_type);
++      case GLSL_SAMPLER_DIM_RECT:
++         if (array)
++            return error_type;
++         else
++            return image2DRect_type;
++      case GLSL_SAMPLER_DIM_BUF:
++         if (array)
++            return error_type;
++         else
++            return imageBuffer_type;
++      case GLSL_SAMPLER_DIM_MS:
++         return (array ? image2DMSArray_type : image2DMS_type);
++      case GLSL_SAMPLER_DIM_EXTERNAL:
++         return error_type;
++      }
++   case GLSL_TYPE_INT:
++      switch (dim) {
++      case GLSL_SAMPLER_DIM_1D:
++         return (array ? iimage1DArray_type : iimage1D_type);
++      case GLSL_SAMPLER_DIM_2D:
++         return (array ? iimage2DArray_type : iimage2D_type);
++      case GLSL_SAMPLER_DIM_3D:
++         if (array)
++            return error_type;
++         return iimage3D_type;
++      case GLSL_SAMPLER_DIM_CUBE:
++         return (array ? iimageCubeArray_type : iimageCube_type);
++      case GLSL_SAMPLER_DIM_RECT:
++         if (array)
++            return error_type;
++         return iimage2DRect_type;
++      case GLSL_SAMPLER_DIM_BUF:
++         if (array)
++            return error_type;
++         return iimageBuffer_type;
++      case GLSL_SAMPLER_DIM_MS:
++         return (array ? iimage2DMSArray_type : iimage2DMS_type);
++      case GLSL_SAMPLER_DIM_EXTERNAL:
++         return error_type;
++      }
++   case GLSL_TYPE_UINT:
++      switch (dim) {
++      case GLSL_SAMPLER_DIM_1D:
++         return (array ? uimage1DArray_type : uimage1D_type);
++      case GLSL_SAMPLER_DIM_2D:
++         return (array ? uimage2DArray_type : uimage2D_type);
++      case GLSL_SAMPLER_DIM_3D:
++         if (array)
++            return error_type;
++         return uimage3D_type;
++      case GLSL_SAMPLER_DIM_CUBE:
++         return (array ? uimageCubeArray_type : uimageCube_type);
++      case GLSL_SAMPLER_DIM_RECT:
++         if (array)
++            return error_type;
++         return uimage2DRect_type;
++      case GLSL_SAMPLER_DIM_BUF:
++         if (array)
++            return error_type;
++         return uimageBuffer_type;
++      case GLSL_SAMPLER_DIM_MS:
++         return (array ? uimage2DMSArray_type : uimage2DMS_type);
++      case GLSL_SAMPLER_DIM_EXTERNAL:
++         return error_type;
++      }
++   default:
++      return error_type;
++   }
++
++   unreachable("switch statement above should be complete");
++}
++
+ const glsl_type *
+ glsl_type::get_array_instance(const glsl_type *base, unsigned array_size)
+ {
+    /* Generate a name using the base type pointer in the key.  This is
+     * done because the name of the base type may not be unique across
+     * shaders.  For example, two shaders may have different record types
+     * named 'foo'.
+     */
+    char key[128];
+    snprintf(key, sizeof(key), "%p[%u]", (void *) base, array_size);
+ 
+    mtx_lock(&glsl_type::mutex);
+ 
+    if (array_types == NULL) {
+       array_types = _mesa_hash_table_create(NULL, _mesa_key_hash_string,
+                                             _mesa_key_string_equal);
+    }
+ 
+    const struct hash_entry *entry = _mesa_hash_table_search(array_types, key);
+    if (entry == NULL) {
+       mtx_unlock(&glsl_type::mutex);
+       const glsl_type *t = new glsl_type(base, array_size);
+       mtx_lock(&glsl_type::mutex);
+ 
+       entry = _mesa_hash_table_insert(array_types,
+                                       ralloc_strdup(mem_ctx, key),
+                                       (void *) t);
+    }
+ 
+    assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_ARRAY);
+    assert(((glsl_type *) entry->data)->length == array_size);
+    assert(((glsl_type *) entry->data)->fields.array == base);
+ 
+    mtx_unlock(&glsl_type::mutex);
+ 
+    return (glsl_type *) entry->data;
+ }
+ 
+ 
+ bool
+ glsl_type::record_compare(const glsl_type *b) const
+ {
+    if (this->length != b->length)
+       return false;
+ 
+    if (this->interface_packing != b->interface_packing)
+       return false;
+ 
+    /* From the GLSL 4.20 specification (Sec 4.2):
+     *
+     *     "Structures must have the same name, sequence of type names, and
+     *     type definitions, and field names to be considered the same type."
+     *
+     * GLSL ES behaves the same (Ver 1.00 Sec 4.2.4, Ver 3.00 Sec 4.2.5).
+     *
+     * Note that we cannot force type name check when comparing unnamed
+     * structure types, these have a unique name assigned during parsing.
+     */
+    if (!this->is_anonymous() && !b->is_anonymous())
+       if (strcmp(this->name, b->name) != 0)
+          return false;
+ 
+    for (unsigned i = 0; i < this->length; i++) {
+       if (this->fields.structure[i].type != b->fields.structure[i].type)
+          return false;
+       if (strcmp(this->fields.structure[i].name,
+                  b->fields.structure[i].name) != 0)
+          return false;
+       if (this->fields.structure[i].matrix_layout
+          != b->fields.structure[i].matrix_layout)
+         return false;
+       if (this->fields.structure[i].location
+           != b->fields.structure[i].location)
+          return false;
+       if (this->fields.structure[i].interpolation
+           != b->fields.structure[i].interpolation)
+          return false;
+       if (this->fields.structure[i].centroid
+           != b->fields.structure[i].centroid)
+          return false;
+       if (this->fields.structure[i].sample
+           != b->fields.structure[i].sample)
+          return false;
+       if (this->fields.structure[i].patch
+           != b->fields.structure[i].patch)
+          return false;
+       if (this->fields.structure[i].image_read_only
+           != b->fields.structure[i].image_read_only)
+          return false;
+       if (this->fields.structure[i].image_write_only
+           != b->fields.structure[i].image_write_only)
+          return false;
+       if (this->fields.structure[i].image_coherent
+           != b->fields.structure[i].image_coherent)
+          return false;
+       if (this->fields.structure[i].image_volatile
+           != b->fields.structure[i].image_volatile)
+          return false;
+       if (this->fields.structure[i].image_restrict
+           != b->fields.structure[i].image_restrict)
+          return false;
+       if (this->fields.structure[i].precision
+           != b->fields.structure[i].precision)
+          return false;
+    }
+ 
+    return true;
+ }
+ 
+ 
+ bool
+ glsl_type::record_key_compare(const void *a, const void *b)
+ {
+    const glsl_type *const key1 = (glsl_type *) a;
+    const glsl_type *const key2 = (glsl_type *) b;
+ 
+    return strcmp(key1->name, key2->name) == 0 && key1->record_compare(key2);
+ }
+ 
+ 
+ /**
+  * Generate an integer hash value for a glsl_type structure type.
+  */
+ unsigned
+ glsl_type::record_key_hash(const void *a)
+ {
+    const glsl_type *const key = (glsl_type *) a;
+    uintptr_t hash = key->length;
+    unsigned retval;
+ 
+    for (unsigned i = 0; i < key->length; i++) {
+       /* casting pointer to uintptr_t */
+       hash = (hash * 13 ) + (uintptr_t) key->fields.structure[i].type;
+    }
+ 
+    if (sizeof(hash) == 8)
+       retval = (hash & 0xffffffff) ^ ((uint64_t) hash >> 32);
+    else
+       retval = hash;
+ 
+    return retval;
+ }
+ 
+ 
+ const glsl_type *
+ glsl_type::get_record_instance(const glsl_struct_field *fields,
+                                unsigned num_fields,
+                                const char *name)
+ {
+    const glsl_type key(fields, num_fields, name);
+ 
+    mtx_lock(&glsl_type::mutex);
+ 
+    if (record_types == NULL) {
+       record_types = _mesa_hash_table_create(NULL, record_key_hash,
+                                              record_key_compare);
+    }
+ 
+    const struct hash_entry *entry = _mesa_hash_table_search(record_types,
+                                                             &key);
+    if (entry == NULL) {
+       mtx_unlock(&glsl_type::mutex);
+       const glsl_type *t = new glsl_type(fields, num_fields, name);
+       mtx_lock(&glsl_type::mutex);
+ 
+       entry = _mesa_hash_table_insert(record_types, t, (void *) t);
+    }
+ 
+    assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_STRUCT);
+    assert(((glsl_type *) entry->data)->length == num_fields);
+    assert(strcmp(((glsl_type *) entry->data)->name, name) == 0);
+ 
+    mtx_unlock(&glsl_type::mutex);
+ 
+    return (glsl_type *) entry->data;
+ }
+ 
+ 
+ const glsl_type *
+ glsl_type::get_interface_instance(const glsl_struct_field *fields,
+                                   unsigned num_fields,
+                                   enum glsl_interface_packing packing,
+                                   const char *block_name)
+ {
+    const glsl_type key(fields, num_fields, packing, block_name);
+ 
+    mtx_lock(&glsl_type::mutex);
+ 
+    if (interface_types == NULL) {
+       interface_types = _mesa_hash_table_create(NULL, record_key_hash,
+                                                 record_key_compare);
+    }
+ 
+    const struct hash_entry *entry = _mesa_hash_table_search(interface_types,
+                                                             &key);
+    if (entry == NULL) {
+       mtx_unlock(&glsl_type::mutex);
+       const glsl_type *t = new glsl_type(fields, num_fields,
+                                          packing, block_name);
+       mtx_lock(&glsl_type::mutex);
+ 
+       entry = _mesa_hash_table_insert(interface_types, t, (void *) t);
+    }
+ 
+    assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_INTERFACE);
+    assert(((glsl_type *) entry->data)->length == num_fields);
+    assert(strcmp(((glsl_type *) entry->data)->name, block_name) == 0);
+ 
+    mtx_unlock(&glsl_type::mutex);
+ 
+    return (glsl_type *) entry->data;
+ }
+ 
+ const glsl_type *
+ glsl_type::get_subroutine_instance(const char *subroutine_name)
+ {
+    const glsl_type key(subroutine_name);
+ 
+    mtx_lock(&glsl_type::mutex);
+ 
+    if (subroutine_types == NULL) {
+       subroutine_types = _mesa_hash_table_create(NULL, record_key_hash,
+                                                  record_key_compare);
+    }
+ 
+    const struct hash_entry *entry = _mesa_hash_table_search(subroutine_types,
+                                                             &key);
+    if (entry == NULL) {
+       mtx_unlock(&glsl_type::mutex);
+       const glsl_type *t = new glsl_type(subroutine_name);
+       mtx_lock(&glsl_type::mutex);
+ 
+       entry = _mesa_hash_table_insert(subroutine_types, t, (void *) t);
+    }
+ 
+    assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_SUBROUTINE);
+    assert(strcmp(((glsl_type *) entry->data)->name, subroutine_name) == 0);
+ 
+    mtx_unlock(&glsl_type::mutex);
+ 
+    return (glsl_type *) entry->data;
+ }
+ 
+ 
++static bool
++function_key_compare(const void *a, const void *b)
++{
++   const glsl_type *const key1 = (glsl_type *) a;
++   const glsl_type *const key2 = (glsl_type *) b;
++
++   if (key1->length != key2->length)
++      return 1;
++
++   return memcmp(key1->fields.parameters, key2->fields.parameters,
++                 (key1->length + 1) * sizeof(*key1->fields.parameters)) == 0;
++}
++
++
++static uint32_t
++function_key_hash(const void *a)
++{
++   const glsl_type *const key = (glsl_type *) a;
++   char hash_key[128];
++   unsigned size = 0;
++
++   size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length);
++
++   for (unsigned i = 0; i < key->length; i++) {
++      if (size >= sizeof(hash_key))
++       break;
++
++      size += snprintf(& hash_key[size], sizeof(hash_key) - size,
++                     "%p", (void *) key->fields.structure[i].type);
++   }
++
++   return _mesa_hash_string(hash_key);
++}
++
++const glsl_type *
++glsl_type::get_function_instance(const glsl_type *return_type,
++                                 const glsl_function_param *params,
++                                 unsigned num_params)
++{
++   const glsl_type key(return_type, params, num_params);
++
++   mtx_lock(&glsl_type::mutex);
++
++   if (function_types == NULL) {
++      function_types = _mesa_hash_table_create(NULL, function_key_hash,
++                                               function_key_compare);
++   }
++
++   struct hash_entry *entry = _mesa_hash_table_search(function_types, &key);
++   if (entry == NULL) {
++      mtx_unlock(&glsl_type::mutex);
++      const glsl_type *t = new glsl_type(return_type, params, num_params);
++      mtx_lock(&glsl_type::mutex);
++
++      entry = _mesa_hash_table_insert(function_types, t, (void *) t);
++   }
++
++   const glsl_type *t = (const glsl_type *)entry->data;
++
++   assert(t->base_type == GLSL_TYPE_FUNCTION);
++   assert(t->length == num_params);
++
++   mtx_unlock(&glsl_type::mutex);
++
++   return t;
++}
++
++
+ const glsl_type *
+ glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b)
+ {
+    if (type_a == type_b) {
+       return type_a;
+    } else if (type_a->is_matrix() && type_b->is_matrix()) {
+       /* Matrix multiply.  The columns of A must match the rows of B.  Given
+        * the other previously tested constraints, this means the vector type
+        * of a row from A must be the same as the vector type of a column from
+        * B.
+        */
+       if (type_a->row_type() == type_b->column_type()) {
+          /* The resulting matrix has the number of columns of matrix B and
+           * the number of rows of matrix A.  We get the row count of A by
+           * looking at the size of a vector that makes up a column.  The
+           * transpose (size of a row) is done for B.
+           */
+          const glsl_type *const type =
+             get_instance(type_a->base_type,
+                          type_a->column_type()->vector_elements,
+                          type_b->row_type()->vector_elements);
+          assert(type != error_type);
+ 
+          return type;
+       }
+    } else if (type_a->is_matrix()) {
+       /* A is a matrix and B is a column vector.  Columns of A must match
+        * rows of B.  Given the other previously tested constraints, this
+        * means the vector type of a row from A must be the same as the
+        * vector the type of B.
+        */
+       if (type_a->row_type() == type_b) {
+          /* The resulting vector has a number of elements equal to
+           * the number of rows of matrix A. */
+          const glsl_type *const type =
+             get_instance(type_a->base_type,
+                          type_a->column_type()->vector_elements,
+                          1);
+          assert(type != error_type);
+ 
+          return type;
+       }
+    } else {
+       assert(type_b->is_matrix());
+ 
+       /* A is a row vector and B is a matrix.  Columns of A must match rows
+        * of B.  Given the other previously tested constraints, this means
+        * the type of A must be the same as the vector type of a column from
+        * B.
+        */
+       if (type_a == type_b->column_type()) {
+          /* The resulting vector has a number of elements equal to
+           * the number of columns of matrix B. */
+          const glsl_type *const type =
+             get_instance(type_a->base_type,
+                          type_b->row_type()->vector_elements,
+                          1);
+          assert(type != error_type);
+ 
+          return type;
+       }
+    }
+ 
+    return error_type;
+ }
+ 
+ 
+ const glsl_type *
+ glsl_type::field_type(const char *name) const
+ {
+    if (this->base_type != GLSL_TYPE_STRUCT
+        && this->base_type != GLSL_TYPE_INTERFACE)
+       return error_type;
+ 
+    for (unsigned i = 0; i < this->length; i++) {
+       if (strcmp(name, this->fields.structure[i].name) == 0)
+          return this->fields.structure[i].type;
+    }
+ 
+    return error_type;
+ }
+ 
+ 
+ int
+ glsl_type::field_index(const char *name) const
+ {
+    if (this->base_type != GLSL_TYPE_STRUCT
+        && this->base_type != GLSL_TYPE_INTERFACE)
+       return -1;
+ 
+    for (unsigned i = 0; i < this->length; i++) {
+       if (strcmp(name, this->fields.structure[i].name) == 0)
+          return i;
+    }
+ 
+    return -1;
+ }
+ 
+ 
+ unsigned
+ glsl_type::component_slots() const
+ {
+    switch (this->base_type) {
+    case GLSL_TYPE_UINT:
+    case GLSL_TYPE_INT:
+    case GLSL_TYPE_FLOAT:
+    case GLSL_TYPE_BOOL:
+       return this->components();
+ 
+    case GLSL_TYPE_DOUBLE:
+       return 2 * this->components();
+ 
+    case GLSL_TYPE_STRUCT:
+    case GLSL_TYPE_INTERFACE: {
+       unsigned size = 0;
+ 
+       for (unsigned i = 0; i < this->length; i++)
+          size += this->fields.structure[i].type->component_slots();
+ 
+       return size;
+    }
+ 
+    case GLSL_TYPE_ARRAY:
+       return this->length * this->fields.array->component_slots();
+ 
+    case GLSL_TYPE_IMAGE:
+       return 1;
+    case GLSL_TYPE_SUBROUTINE:
+      return 1;
++
++   case GLSL_TYPE_FUNCTION:
+    case GLSL_TYPE_SAMPLER:
+    case GLSL_TYPE_ATOMIC_UINT:
+    case GLSL_TYPE_VOID:
+    case GLSL_TYPE_ERROR:
+       break;
+    }
+ 
+    return 0;
+ }
+ 
+ unsigned
+ glsl_type::record_location_offset(unsigned length) const
+ {
+    unsigned offset = 0;
+    const glsl_type *t = this->without_array();
+    if (t->is_record()) {
+       assert(length <= t->length);
+ 
+       for (unsigned i = 0; i < length; i++) {
+          const glsl_type *st = t->fields.structure[i].type;
+          const glsl_type *wa = st->without_array();
+          if (wa->is_record()) {
+             unsigned r_offset = wa->record_location_offset(wa->length);
+             offset += st->is_array() ?
+                st->arrays_of_arrays_size() * r_offset : r_offset;
+          } else if (st->is_array() && st->fields.array->is_array()) {
+             unsigned outer_array_size = st->length;
+             const glsl_type *base_type = st->fields.array;
+ 
+             /* For arrays of arrays the outer arrays take up a uniform
+              * slot for each element. The innermost array elements share a
+              * single slot so we ignore the innermost array when calculating
+              * the offset.
+              */
+             while (base_type->fields.array->is_array()) {
+                outer_array_size = outer_array_size * base_type->length;
+                base_type = base_type->fields.array;
+             }
+             offset += outer_array_size;
+          } else {
+             /* We dont worry about arrays here because unless the array
+              * contains a structure or another array it only takes up a single
+              * uniform slot.
+              */
+             offset += 1;
+          }
+       }
+    }
+    return offset;
+ }
+ 
+ unsigned
+ glsl_type::uniform_locations() const
+ {
+    unsigned size = 0;
+ 
+    switch (this->base_type) {
+    case GLSL_TYPE_UINT:
+    case GLSL_TYPE_INT:
+    case GLSL_TYPE_FLOAT:
+    case GLSL_TYPE_DOUBLE:
+    case GLSL_TYPE_BOOL:
+    case GLSL_TYPE_SAMPLER:
+    case GLSL_TYPE_IMAGE:
+    case GLSL_TYPE_SUBROUTINE:
+       return 1;
+ 
+    case GLSL_TYPE_STRUCT:
+    case GLSL_TYPE_INTERFACE:
+       for (unsigned i = 0; i < this->length; i++)
+          size += this->fields.structure[i].type->uniform_locations();
+       return size;
+    case GLSL_TYPE_ARRAY:
+       return this->length * this->fields.array->uniform_locations();
+    default:
+       return 0;
+    }
+ }
+ 
+ bool
+ glsl_type::can_implicitly_convert_to(const glsl_type *desired,
+                                      _mesa_glsl_parse_state *state) const
+ {
+    if (this == desired)
+       return true;
+ 
+    /* There is no conversion among matrix types. */
+    if (this->matrix_columns > 1 || desired->matrix_columns > 1)
+       return false;
+ 
+    /* Vector size must match. */
+    if (this->vector_elements != desired->vector_elements)
+       return false;
+ 
+    /* int and uint can be converted to float. */
+    if (desired->is_float() && this->is_integer())
+       return true;
+ 
+    /* With GLSL 4.0 / ARB_gpu_shader5, int can be converted to uint.
+     * Note that state may be NULL here, when resolving function calls in the
+     * linker. By this time, all the state-dependent checks have already
+     * happened though, so allow anything that's allowed in any shader version. */
+    if ((!state || state->is_version(400, 0) || state->ARB_gpu_shader5_enable) &&
+          desired->base_type == GLSL_TYPE_UINT && this->base_type == GLSL_TYPE_INT)
+       return true;
+ 
+    /* No implicit conversions from double. */
+    if ((!state || state->has_double()) && this->is_double())
+       return false;
+ 
+    /* Conversions from different types to double. */
+    if ((!state || state->has_double()) && desired->is_double()) {
+       if (this->is_float())
+          return true;
+       if (this->is_integer())
+          return true;
+    }
+ 
+    return false;
+ }
+ 
+ unsigned
+ glsl_type::std140_base_alignment(bool row_major) const
+ {
+    unsigned N = is_double() ? 8 : 4;
+ 
+    /* (1) If the member is a scalar consuming <N> basic machine units, the
+     *     base alignment is <N>.
+     *
+     * (2) If the member is a two- or four-component vector with components
+     *     consuming <N> basic machine units, the base alignment is 2<N> or
+     *     4<N>, respectively.
+     *
+     * (3) If the member is a three-component vector with components consuming
+     *     <N> basic machine units, the base alignment is 4<N>.
+     */
+    if (this->is_scalar() || this->is_vector()) {
+       switch (this->vector_elements) {
+       case 1:
+          return N;
+       case 2:
+          return 2 * N;
+       case 3:
+       case 4:
+          return 4 * N;
+       }
+    }
+ 
+    /* (4) If the member is an array of scalars or vectors, the base alignment
+     *     and array stride are set to match the base alignment of a single
+     *     array element, according to rules (1), (2), and (3), and rounded up
+     *     to the base alignment of a vec4. The array may have padding at the
+     *     end; the base offset of the member following the array is rounded up
+     *     to the next multiple of the base alignment.
+     *
+     * (6) If the member is an array of <S> column-major matrices with <C>
+     *     columns and <R> rows, the matrix is stored identically to a row of
+     *     <S>*<C> column vectors with <R> components each, according to rule
+     *     (4).
+     *
+     * (8) If the member is an array of <S> row-major matrices with <C> columns
+     *     and <R> rows, the matrix is stored identically to a row of <S>*<R>
+     *     row vectors with <C> components each, according to rule (4).
+     *
+     * (10) If the member is an array of <S> structures, the <S> elements of
+     *      the array are laid out in order, according to rule (9).
+     */
+    if (this->is_array()) {
+       if (this->fields.array->is_scalar() ||
+           this->fields.array->is_vector() ||
+           this->fields.array->is_matrix()) {
+          return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
+       } else {
+          assert(this->fields.array->is_record() ||
+                 this->fields.array->is_array());
+          return this->fields.array->std140_base_alignment(row_major);
+       }
+    }
+ 
+    /* (5) If the member is a column-major matrix with <C> columns and
+     *     <R> rows, the matrix is stored identically to an array of
+     *     <C> column vectors with <R> components each, according to
+     *     rule (4).
+     *
+     * (7) If the member is a row-major matrix with <C> columns and <R>
+     *     rows, the matrix is stored identically to an array of <R>
+     *     row vectors with <C> components each, according to rule (4).
+     */
+    if (this->is_matrix()) {
+       const struct glsl_type *vec_type, *array_type;
+       int c = this->matrix_columns;
+       int r = this->vector_elements;
+ 
+       if (row_major) {
+          vec_type = get_instance(base_type, c, 1);
+          array_type = glsl_type::get_array_instance(vec_type, r);
+       } else {
+          vec_type = get_instance(base_type, r, 1);
+          array_type = glsl_type::get_array_instance(vec_type, c);
+       }
+ 
+       return array_type->std140_base_alignment(false);
+    }
+ 
+    /* (9) If the member is a structure, the base alignment of the
+     *     structure is <N>, where <N> is the largest base alignment
+     *     value of any of its members, and rounded up to the base
+     *     alignment of a vec4. The individual members of this
+     *     sub-structure are then assigned offsets by applying this set
+     *     of rules recursively, where the base offset of the first
+     *     member of the sub-structure is equal to the aligned offset
+     *     of the structure. The structure may have padding at the end;
+     *     the base offset of the member following the sub-structure is
+     *     rounded up to the next multiple of the base alignment of the
+     *     structure.
+     */
+    if (this->is_record()) {
+       unsigned base_alignment = 16;
+       for (unsigned i = 0; i < this->length; i++) {
+          bool field_row_major = row_major;
+          const enum glsl_matrix_layout matrix_layout =
+             glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+          if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+             field_row_major = true;
+          } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+             field_row_major = false;
+          }
+ 
+          const struct glsl_type *field_type = this->fields.structure[i].type;
+          base_alignment = MAX2(base_alignment,
+                                field_type->std140_base_alignment(field_row_major));
+       }
+       return base_alignment;
+    }
+ 
+    assert(!"not reached");
+    return -1;
+ }
+ 
+ unsigned
+ glsl_type::std140_size(bool row_major) const
+ {
+    unsigned N = is_double() ? 8 : 4;
+ 
+    /* (1) If the member is a scalar consuming <N> basic machine units, the
+     *     base alignment is <N>.
+     *
+     * (2) If the member is a two- or four-component vector with components
+     *     consuming <N> basic machine units, the base alignment is 2<N> or
+     *     4<N>, respectively.
+     *
+     * (3) If the member is a three-component vector with components consuming
+     *     <N> basic machine units, the base alignment is 4<N>.
+     */
+    if (this->is_scalar() || this->is_vector()) {
+       return this->vector_elements * N;
+    }
+ 
+    /* (5) If the member is a column-major matrix with <C> columns and
+     *     <R> rows, the matrix is stored identically to an array of
+     *     <C> column vectors with <R> components each, according to
+     *     rule (4).
+     *
+     * (6) If the member is an array of <S> column-major matrices with <C>
+     *     columns and <R> rows, the matrix is stored identically to a row of
+     *     <S>*<C> column vectors with <R> components each, according to rule
+     *     (4).
+     *
+     * (7) If the member is a row-major matrix with <C> columns and <R>
+     *     rows, the matrix is stored identically to an array of <R>
+     *     row vectors with <C> components each, according to rule (4).
+     *
+     * (8) If the member is an array of <S> row-major matrices with <C> columns
+     *     and <R> rows, the matrix is stored identically to a row of <S>*<R>
+     *     row vectors with <C> components each, according to rule (4).
+     */
+    if (this->without_array()->is_matrix()) {
+       const struct glsl_type *element_type;
+       const struct glsl_type *vec_type;
+       unsigned int array_len;
+ 
+       if (this->is_array()) {
+          element_type = this->without_array();
+          array_len = this->arrays_of_arrays_size();
+       } else {
+          element_type = this;
+          array_len = 1;
+       }
+ 
+       if (row_major) {
+          vec_type = get_instance(element_type->base_type,
+                                  element_type->matrix_columns, 1);
+ 
+          array_len *= element_type->vector_elements;
+       } else {
+          vec_type = get_instance(element_type->base_type,
+                                  element_type->vector_elements, 1);
+          array_len *= element_type->matrix_columns;
+       }
+       const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
+                                                                   array_len);
+ 
+       return array_type->std140_size(false);
+    }
+ 
+    /* (4) If the member is an array of scalars or vectors, the base alignment
+     *     and array stride are set to match the base alignment of a single
+     *     array element, according to rules (1), (2), and (3), and rounded up
+     *     to the base alignment of a vec4. The array may have padding at the
+     *     end; the base offset of the member following the array is rounded up
+     *     to the next multiple of the base alignment.
+     *
+     * (10) If the member is an array of <S> structures, the <S> elements of
+     *      the array are laid out in order, according to rule (9).
+     */
+    if (this->is_array()) {
+       if (this->without_array()->is_record()) {
+        return this->arrays_of_arrays_size() *
+             this->without_array()->std140_size(row_major);
+       } else {
+        unsigned element_base_align =
+           this->without_array()->std140_base_alignment(row_major);
+        return this->arrays_of_arrays_size() * MAX2(element_base_align, 16);
+       }
+    }
+ 
+    /* (9) If the member is a structure, the base alignment of the
+     *     structure is <N>, where <N> is the largest base alignment
+     *     value of any of its members, and rounded up to the base
+     *     alignment of a vec4. The individual members of this
+     *     sub-structure are then assigned offsets by applying this set
+     *     of rules recursively, where the base offset of the first
+     *     member of the sub-structure is equal to the aligned offset
+     *     of the structure. The structure may have padding at the end;
+     *     the base offset of the member following the sub-structure is
+     *     rounded up to the next multiple of the base alignment of the
+     *     structure.
+     */
+    if (this->is_record() || this->is_interface()) {
+       unsigned size = 0;
+       unsigned max_align = 0;
+ 
+       for (unsigned i = 0; i < this->length; i++) {
+          bool field_row_major = row_major;
+          const enum glsl_matrix_layout matrix_layout =
+             glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+          if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+             field_row_major = true;
+          } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+             field_row_major = false;
+          }
+ 
+          const struct glsl_type *field_type = this->fields.structure[i].type;
+          unsigned align = field_type->std140_base_alignment(field_row_major);
+ 
+          /* Ignore unsized arrays when calculating size */
+          if (field_type->is_unsized_array())
+             continue;
+ 
+          size = glsl_align(size, align);
+          size += field_type->std140_size(field_row_major);
+ 
+          max_align = MAX2(align, max_align);
+ 
+          if (field_type->is_record() && (i + 1 < this->length))
+             size = glsl_align(size, 16);
+       }
+       size = glsl_align(size, MAX2(max_align, 16));
+       return size;
+    }
+ 
+    assert(!"not reached");
+    return -1;
+ }
+ 
+ unsigned
+ glsl_type::std430_base_alignment(bool row_major) const
+ {
+ 
+    unsigned N = is_double() ? 8 : 4;
+ 
+    /* (1) If the member is a scalar consuming <N> basic machine units, the
+     *     base alignment is <N>.
+     *
+     * (2) If the member is a two- or four-component vector with components
+     *     consuming <N> basic machine units, the base alignment is 2<N> or
+     *     4<N>, respectively.
+     *
+     * (3) If the member is a three-component vector with components consuming
+     *     <N> basic machine units, the base alignment is 4<N>.
+     */
+    if (this->is_scalar() || this->is_vector()) {
+       switch (this->vector_elements) {
+       case 1:
+          return N;
+       case 2:
+          return 2 * N;
+       case 3:
+       case 4:
+          return 4 * N;
+       }
+    }
+ 
+    /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+     *
+     * "When using the std430 storage layout, shader storage blocks will be
+     * laid out in buffer storage identically to uniform and shader storage
+     * blocks using the std140 layout, except that the base alignment and
+     * stride of arrays of scalars and vectors in rule 4 and of structures
+     * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+     */
+ 
+    /* (1) If the member is a scalar consuming <N> basic machine units, the
+     *     base alignment is <N>.
+     *
+     * (2) If the member is a two- or four-component vector with components
+     *     consuming <N> basic machine units, the base alignment is 2<N> or
+     *     4<N>, respectively.
+     *
+     * (3) If the member is a three-component vector with components consuming
+     *     <N> basic machine units, the base alignment is 4<N>.
+     */
+    if (this->is_array())
+       return this->fields.array->std430_base_alignment(row_major);
+ 
+    /* (5) If the member is a column-major matrix with <C> columns and
+     *     <R> rows, the matrix is stored identically to an array of
+     *     <C> column vectors with <R> components each, according to
+     *     rule (4).
+     *
+     * (7) If the member is a row-major matrix with <C> columns and <R>
+     *     rows, the matrix is stored identically to an array of <R>
+     *     row vectors with <C> components each, according to rule (4).
+     */
+    if (this->is_matrix()) {
+       const struct glsl_type *vec_type, *array_type;
+       int c = this->matrix_columns;
+       int r = this->vector_elements;
+ 
+       if (row_major) {
+          vec_type = get_instance(base_type, c, 1);
+          array_type = glsl_type::get_array_instance(vec_type, r);
+       } else {
+          vec_type = get_instance(base_type, r, 1);
+          array_type = glsl_type::get_array_instance(vec_type, c);
+       }
+ 
+       return array_type->std430_base_alignment(false);
+    }
+ 
+       /* (9) If the member is a structure, the base alignment of the
+     *     structure is <N>, where <N> is the largest base alignment
+     *     value of any of its members, and rounded up to the base
+     *     alignment of a vec4. The individual members of this
+     *     sub-structure are then assigned offsets by applying this set
+     *     of rules recursively, where the base offset of the first
+     *     member of the sub-structure is equal to the aligned offset
+     *     of the structure. The structure may have padding at the end;
+     *     the base offset of the member following the sub-structure is
+     *     rounded up to the next multiple of the base alignment of the
+     *     structure.
+     */
+    if (this->is_record()) {
+       unsigned base_alignment = 0;
+       for (unsigned i = 0; i < this->length; i++) {
+          bool field_row_major = row_major;
+          const enum glsl_matrix_layout matrix_layout =
+             glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+          if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+             field_row_major = true;
+          } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+             field_row_major = false;
+          }
+ 
+          const struct glsl_type *field_type = this->fields.structure[i].type;
+          base_alignment = MAX2(base_alignment,
+                                field_type->std430_base_alignment(field_row_major));
+       }
+       assert(base_alignment > 0);
+       return base_alignment;
+    }
+    assert(!"not reached");
+    return -1;
+ }
+ 
+ unsigned
+ glsl_type::std430_array_stride(bool row_major) const
+ {
+    unsigned N = is_double() ? 8 : 4;
+ 
+    /* Notice that the array stride of a vec3 is not 3 * N but 4 * N.
+     * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout"
+     *
+     * (3) If the member is a three-component vector with components consuming
+     *     <N> basic machine units, the base alignment is 4<N>.
+     */
+    if (this->is_vector() && this->vector_elements == 3)
+       return 4 * N;
+ 
+    /* By default use std430_size(row_major) */
+    return this->std430_size(row_major);
+ }
+ 
+ unsigned
+ glsl_type::std430_size(bool row_major) const
+ {
+    unsigned N = is_double() ? 8 : 4;
+ 
+    /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+     *
+     * "When using the std430 storage layout, shader storage blocks will be
+     * laid out in buffer storage identically to uniform and shader storage
+     * blocks using the std140 layout, except that the base alignment and
+     * stride of arrays of scalars and vectors in rule 4 and of structures
+     * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+     */
+    if (this->is_scalar() || this->is_vector())
+          return this->vector_elements * N;
+ 
+    if (this->without_array()->is_matrix()) {
+       const struct glsl_type *element_type;
+       const struct glsl_type *vec_type;
+       unsigned int array_len;
+ 
+       if (this->is_array()) {
+          element_type = this->without_array();
+          array_len = this->arrays_of_arrays_size();
+       } else {
+          element_type = this;
+          array_len = 1;
+       }
+ 
+       if (row_major) {
+          vec_type = get_instance(element_type->base_type,
+                                  element_type->matrix_columns, 1);
+ 
+          array_len *= element_type->vector_elements;
+       } else {
+          vec_type = get_instance(element_type->base_type,
+                                  element_type->vector_elements, 1);
+          array_len *= element_type->matrix_columns;
+       }
+       const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
+                                                                   array_len);
+ 
+       return array_type->std430_size(false);
+    }
+ 
+    if (this->is_array()) {
+       if (this->without_array()->is_record())
+          return this->arrays_of_arrays_size() *
+             this->without_array()->std430_size(row_major);
+       else
+          return this->arrays_of_arrays_size() *
+             this->without_array()->std430_base_alignment(row_major);
+    }
+ 
+    if (this->is_record() || this->is_interface()) {
+       unsigned size = 0;
+       unsigned max_align = 0;
+ 
+       for (unsigned i = 0; i < this->length; i++) {
+          bool field_row_major = row_major;
+          const enum glsl_matrix_layout matrix_layout =
+             glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+          if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+             field_row_major = true;
+          } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+             field_row_major = false;
+          }
+ 
+          const struct glsl_type *field_type = this->fields.structure[i].type;
+          unsigned align = field_type->std430_base_alignment(field_row_major);
+          size = glsl_align(size, align);
+          size += field_type->std430_size(field_row_major);
+ 
+          max_align = MAX2(align, max_align);
+       }
+       size = glsl_align(size, max_align);
+       return size;
+    }
+ 
+    assert(!"not reached");
+    return -1;
+ }
+ 
+ unsigned
+ glsl_type::count_attribute_slots(bool vertex_input_slots) const
+ {
+    /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec:
+     *
+     *     "A scalar input counts the same amount against this limit as a vec4,
+     *     so applications may want to consider packing groups of four
+     *     unrelated float inputs together into a vector to better utilize the
+     *     capabilities of the underlying hardware. A matrix input will use up
+     *     multiple locations.  The number of locations used will equal the
+     *     number of columns in the matrix."
+     *
+     * The spec does not explicitly say how arrays are counted.  However, it
+     * should be safe to assume the total number of slots consumed by an array
+     * is the number of entries in the array multiplied by the number of slots
+     * consumed by a single element of the array.
+     *
+     * The spec says nothing about how structs are counted, because vertex
+     * attributes are not allowed to be (or contain) structs.  However, Mesa
+     * allows varying structs, the number of varying slots taken up by a
+     * varying struct is simply equal to the sum of the number of slots taken
+     * up by each element.
+     *
+     * Doubles are counted different depending on whether they are vertex
+     * inputs or everything else. Vertex inputs from ARB_vertex_attrib_64bit
+     * take one location no matter what size they are, otherwise dvec3/4
+     * take two locations.
+     */
+    switch (this->base_type) {
+    case GLSL_TYPE_UINT:
+    case GLSL_TYPE_INT:
+    case GLSL_TYPE_FLOAT:
+    case GLSL_TYPE_BOOL:
+       return this->matrix_columns;
+    case GLSL_TYPE_DOUBLE:
+       if (this->vector_elements > 2 && !vertex_input_slots)
+          return this->matrix_columns * 2;
+       else
+          return this->matrix_columns;
+    case GLSL_TYPE_STRUCT:
+    case GLSL_TYPE_INTERFACE: {
+       unsigned size = 0;
+ 
+       for (unsigned i = 0; i < this->length; i++)
+          size += this->fields.structure[i].type->count_attribute_slots(vertex_input_slots);
+ 
+       return size;
+    }
+ 
+    case GLSL_TYPE_ARRAY:
+       return this->length * this->fields.array->count_attribute_slots(vertex_input_slots);
+ 
++   case GLSL_TYPE_FUNCTION:
+    case GLSL_TYPE_SAMPLER:
+    case GLSL_TYPE_IMAGE:
+    case GLSL_TYPE_ATOMIC_UINT:
+    case GLSL_TYPE_VOID:
+    case GLSL_TYPE_SUBROUTINE:
+    case GLSL_TYPE_ERROR:
+       break;
+    }
+ 
+    assert(!"Unexpected type in count_attribute_slots()");
+ 
+    return 0;
+ }
+ 
+ int
+ glsl_type::coordinate_components() const
+ {
+    int size;
+ 
+    switch (sampler_dimensionality) {
+    case GLSL_SAMPLER_DIM_1D:
+    case GLSL_SAMPLER_DIM_BUF:
+       size = 1;
+       break;
+    case GLSL_SAMPLER_DIM_2D:
+    case GLSL_SAMPLER_DIM_RECT:
+    case GLSL_SAMPLER_DIM_MS:
+    case GLSL_SAMPLER_DIM_EXTERNAL:
+       size = 2;
+       break;
+    case GLSL_SAMPLER_DIM_3D:
+    case GLSL_SAMPLER_DIM_CUBE:
+       size = 3;
+       break;
+    default:
+       assert(!"Should not get here.");
+       size = 1;
+       break;
+    }
+ 
+    /* Array textures need an additional component for the array index, except
+     * for cubemap array images that behave like a 2D array of interleaved
+     * cubemap faces.
+     */
+    if (sampler_array &&
+        !(base_type == GLSL_TYPE_IMAGE &&
+          sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE))
+       size += 1;
+ 
+    return size;
+ }
+ 
+ /**
+  * Declarations of type flyweights (glsl_type::_foo_type) and
+  * convenience pointers (glsl_type::foo_type).
+  * @{
+  */
+ #define DECL_TYPE(NAME, ...)                                    \
+    const glsl_type glsl_type::_##NAME##_type = glsl_type(__VA_ARGS__, #NAME); \
+    const glsl_type *const glsl_type::NAME##_type = &glsl_type::_##NAME##_type;
+ 
+ #define STRUCT_TYPE(NAME)
+ 
+ #include "compiler/builtin_type_macros.h"
+ /** @} */
diff --cc src/compiler/glsl_types.h

index 0000000000000000000000000000000000000000,e63d7945c9f437937d2cb62927d26e3d28be58f5..a9b5281e7746147d50c2c26d83e3d21e4262eba6

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/glsl_types.h
+++ b/src/compiler/glsl_types.h
@@@ -1,0 -1,887 +1,913 @@@
- -      const struct glsl_type *parameters;       /**< Parameters to function. */
+ /* -*- c++ -*- */
+ /*
+  * Copyright © 2009 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ #pragma once
+ #ifndef GLSL_TYPES_H
+ #define GLSL_TYPES_H
+ 
+ #include <string.h>
+ #include <assert.h>
+ 
+ #ifdef __cplusplus
+ extern "C" {
+ #endif
+ 
+ struct _mesa_glsl_parse_state;
+ struct glsl_symbol_table;
+ 
+ extern void
+ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state);
+ 
+ extern void
+ _mesa_glsl_release_types(void);
+ 
+ #ifdef __cplusplus
+ }
+ #endif
+ 
+ enum glsl_base_type {
+    GLSL_TYPE_UINT = 0,
+    GLSL_TYPE_INT,
+    GLSL_TYPE_FLOAT,
+    GLSL_TYPE_DOUBLE,
+    GLSL_TYPE_BOOL,
+    GLSL_TYPE_SAMPLER,
+    GLSL_TYPE_IMAGE,
+    GLSL_TYPE_ATOMIC_UINT,
+    GLSL_TYPE_STRUCT,
++   GLSL_TYPE_FUNCTION,
+    GLSL_TYPE_INTERFACE,
+    GLSL_TYPE_ARRAY,
+    GLSL_TYPE_VOID,
+    GLSL_TYPE_SUBROUTINE,
+    GLSL_TYPE_ERROR
+ };
+ 
+ enum glsl_sampler_dim {
+    GLSL_SAMPLER_DIM_1D = 0,
+    GLSL_SAMPLER_DIM_2D,
+    GLSL_SAMPLER_DIM_3D,
+    GLSL_SAMPLER_DIM_CUBE,
+    GLSL_SAMPLER_DIM_RECT,
+    GLSL_SAMPLER_DIM_BUF,
+    GLSL_SAMPLER_DIM_EXTERNAL,
+    GLSL_SAMPLER_DIM_MS
+ };
+ 
+ enum glsl_interface_packing {
+    GLSL_INTERFACE_PACKING_STD140,
+    GLSL_INTERFACE_PACKING_SHARED,
+    GLSL_INTERFACE_PACKING_PACKED,
+    GLSL_INTERFACE_PACKING_STD430
+ };
+ 
+ enum glsl_matrix_layout {
+    /**
+     * The layout of the matrix is inherited from the object containing the
+     * matrix (the top level structure or the uniform block).
+     */
+    GLSL_MATRIX_LAYOUT_INHERITED,
+ 
+    /**
+     * Explicit column-major layout
+     *
+     * If a uniform block doesn't have an explicit layout set, it will default
+     * to this layout.
+     */
+    GLSL_MATRIX_LAYOUT_COLUMN_MAJOR,
+ 
+    /**
+     * Row-major layout
+     */
+    GLSL_MATRIX_LAYOUT_ROW_MAJOR
+ };
+ 
+ enum {
+    GLSL_PRECISION_NONE = 0,
+    GLSL_PRECISION_HIGH,
+    GLSL_PRECISION_MEDIUM,
+    GLSL_PRECISION_LOW
+ };
+ 
+ #ifdef __cplusplus
+ #include "GL/gl.h"
+ #include "util/ralloc.h"
+ #include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */
+ 
+ struct glsl_type {
+    GLenum gl_type;
+    glsl_base_type base_type;
+ 
+    unsigned sampler_dimensionality:3; /**< \see glsl_sampler_dim */
+    unsigned sampler_shadow:1;
+    unsigned sampler_array:1;
+    unsigned sampler_type:2;    /**< Type of data returned using this
+                               * sampler or image.  Only \c
+                               * GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT,
+                               * and \c GLSL_TYPE_UINT are valid.
+                               */
+    unsigned interface_packing:2;
+ 
+    /* Callers of this ralloc-based new need not call delete. It's
+     * easier to just ralloc_free 'mem_ctx' (or any of its ancestors). */
+    static void* operator new(size_t size)
+    {
+       mtx_lock(&glsl_type::mutex);
+ 
+       /* mem_ctx should have been created by the static members */
+       assert(glsl_type::mem_ctx != NULL);
+ 
+       void *type;
+ 
+       type = ralloc_size(glsl_type::mem_ctx, size);
+       assert(type != NULL);
+ 
+       mtx_unlock(&glsl_type::mutex);
+ 
+       return type;
+    }
+ 
+    /* If the user *does* call delete, that's OK, we will just
+     * ralloc_free in that case. */
+    static void operator delete(void *type)
+    {
+       mtx_lock(&glsl_type::mutex);
+       ralloc_free(type);
+       mtx_unlock(&glsl_type::mutex);
+    }
+ 
+    /**
+     * \name Vector and matrix element counts
+     *
+     * For scalars, each of these values will be 1.  For non-numeric types
+     * these will be 0.
+     */
+    /*@{*/
+    uint8_t vector_elements;    /**< 1, 2, 3, or 4 vector elements. */
+    uint8_t matrix_columns;     /**< 1, 2, 3, or 4 matrix columns. */
+    /*@}*/
+ 
+    /**
+     * For \c GLSL_TYPE_ARRAY, this is the length of the array.  For
+     * \c GLSL_TYPE_STRUCT or \c GLSL_TYPE_INTERFACE, it is the number of
+     * elements in the structure and the number of values pointed to by
+     * \c fields.structure (below).
+     */
+    unsigned length;
+ 
+    /**
+     * Name of the data type
+     *
+     * Will never be \c NULL.
+     */
+    const char *name;
+ 
+    /**
+     * Subtype of composite data types.
+     */
+    union {
+       const struct glsl_type *array;            /**< Type of array elements. */
- -#undef DECL_TYPE
- -#undef STRUCT_TYPE
- -#endif /* __cplusplus */
- -
++      struct glsl_function_param *parameters;   /**< Parameters to function. */
+       struct glsl_struct_field *structure;      /**< List of struct fields. */
+    } fields;
+ 
+    /**
+     * \name Pointers to various public type singletons
+     */
+    /*@{*/
+ #undef  DECL_TYPE
+ #define DECL_TYPE(NAME, ...) \
+    static const glsl_type *const NAME##_type;
+ #undef  STRUCT_TYPE
+ #define STRUCT_TYPE(NAME) \
+    static const glsl_type *const struct_##NAME##_type;
+ #include "compiler/builtin_type_macros.h"
+    /*@}*/
+ 
+    /**
+     * Convenience accessors for vector types (shorter than get_instance()).
+     * @{
+     */
+    static const glsl_type *vec(unsigned components);
+    static const glsl_type *dvec(unsigned components);
+    static const glsl_type *ivec(unsigned components);
+    static const glsl_type *uvec(unsigned components);
+    static const glsl_type *bvec(unsigned components);
+    /**@}*/
+ 
+    /**
+     * For numeric and boolean derived types returns the basic scalar type
+     *
+     * If the type is a numeric or boolean scalar, vector, or matrix type,
+     * this function gets the scalar type of the individual components.  For
+     * all other types, including arrays of numeric or boolean types, the
+     * error type is returned.
+     */
+    const glsl_type *get_base_type() const;
+ 
+    /**
+     * Get the basic scalar type which this type aggregates.
+     *
+     * If the type is a numeric or boolean scalar, vector, or matrix, or an
+     * array of any of those, this function gets the scalar type of the
+     * individual components.  For structs and arrays of structs, this function
+     * returns the struct type.  For samplers and arrays of samplers, this
+     * function returns the sampler type.
+     */
+    const glsl_type *get_scalar_type() const;
+ 
+    /**
+     * Get the instance of a built-in scalar, vector, or matrix type
+     */
+    static const glsl_type *get_instance(unsigned base_type, unsigned rows,
+                                       unsigned columns);
+ 
+    /**
+     * Get the instance of a sampler type
+     */
+    static const glsl_type *get_sampler_instance(enum glsl_sampler_dim dim,
+                                                 bool shadow,
+                                                 bool array,
+                                                 glsl_base_type type);
+ 
++   static const glsl_type *get_image_instance(enum glsl_sampler_dim dim,
++                                              bool array, glsl_base_type type);
+ 
+    /**
+     * Get the instance of an array type
+     */
+    static const glsl_type *get_array_instance(const glsl_type *base,
+                                             unsigned elements);
+ 
+    /**
+     * Get the instance of a record type
+     */
+    static const glsl_type *get_record_instance(const glsl_struct_field *fields,
+                                              unsigned num_fields,
+                                              const char *name);
+ 
+    /**
+     * Get the instance of an interface block type
+     */
+    static const glsl_type *get_interface_instance(const glsl_struct_field *fields,
+                                                 unsigned num_fields,
+                                                 enum glsl_interface_packing packing,
+                                                 const char *block_name);
+ 
+    /**
+     * Get the instance of an subroutine type
+     */
+    static const glsl_type *get_subroutine_instance(const char *subroutine_name);
+ 
++   /**
++    * Get the instance of a function type
++    */
++   static const glsl_type *get_function_instance(const struct glsl_type *return_type,
++                                                 const glsl_function_param *parameters,
++                                                 unsigned num_params);
++
+    /**
+     * Get the type resulting from a multiplication of \p type_a * \p type_b
+     */
+    static const glsl_type *get_mul_type(const glsl_type *type_a,
+                                         const glsl_type *type_b);
+ 
+    /**
+     * Query the total number of scalars that make up a scalar, vector or matrix
+     */
+    unsigned components() const
+    {
+       return vector_elements * matrix_columns;
+    }
+ 
+    /**
+     * Calculate the number of components slots required to hold this type
+     *
+     * This is used to determine how many uniform or varying locations a type
+     * might occupy.
+     */
+    unsigned component_slots() const;
+ 
+    /**
+     * Calculate offset between the base location of the struct in
+     * uniform storage and a struct member.
+     * For the initial call, length is the index of the member to find the
+     * offset for.
+     */
+    unsigned record_location_offset(unsigned length) const;
+ 
+    /**
+     * Calculate the number of unique values from glGetUniformLocation for the
+     * elements of the type.
+     *
+     * This is used to allocate slots in the UniformRemapTable, the amount of
+     * locations may not match with actual used storage space by the driver.
+     */
+    unsigned uniform_locations() const;
+ 
+    /**
+     * Calculate the number of attribute slots required to hold this type
+     *
+     * This implements the language rules of GLSL 1.50 for counting the number
+     * of slots used by a vertex attribute.  It also determines the number of
+     * varying slots the type will use up in the absence of varying packing
+     * (and thus, it can be used to measure the number of varying slots used by
+     * the varyings that are generated by lower_packed_varyings).
+     *
+     * For vertex shader attributes - doubles only take one slot.
+     * For inter-shader varyings - dvec3/dvec4 take two slots.
+     */
+    unsigned count_attribute_slots(bool vertex_input_slots) const;
+ 
+    /**
+     * Alignment in bytes of the start of this type in a std140 uniform
+     * block.
+     */
+    unsigned std140_base_alignment(bool row_major) const;
+ 
+    /** Size in bytes of this type in a std140 uniform block.
+     *
+     * Note that this is not GL_UNIFORM_SIZE (which is the number of
+     * elements in the array)
+     */
+    unsigned std140_size(bool row_major) const;
+ 
+    /**
+     * Alignment in bytes of the start of this type in a std430 shader
+     * storage block.
+     */
+    unsigned std430_base_alignment(bool row_major) const;
+ 
+    /**
+     * Calculate array stride in bytes of this type in a std430 shader storage
+     * block.
+     */
+    unsigned std430_array_stride(bool row_major) const;
+ 
+    /**
+     * Size in bytes of this type in a std430 shader storage block.
+     *
+     * Note that this is not GL_BUFFER_SIZE
+     */
+    unsigned std430_size(bool row_major) const;
+ 
+    /**
+     * \brief Can this type be implicitly converted to another?
+     *
+     * \return True if the types are identical or if this type can be converted
+     *         to \c desired according to Section 4.1.10 of the GLSL spec.
+     *
+     * \verbatim
+     * From page 25 (31 of the pdf) of the GLSL 1.50 spec, Section 4.1.10
+     * Implicit Conversions:
+     *
+     *     In some situations, an expression and its type will be implicitly
+     *     converted to a different type. The following table shows all allowed
+     *     implicit conversions:
+     *
+     *     Type of expression | Can be implicitly converted to
+     *     --------------------------------------------------
+     *     int                  float
+     *     uint
+     *
+     *     ivec2                vec2
+     *     uvec2
+     *
+     *     ivec3                vec3
+     *     uvec3
+     *
+     *     ivec4                vec4
+     *     uvec4
+     *
+     *     There are no implicit array or structure conversions. For example,
+     *     an array of int cannot be implicitly converted to an array of float.
+     *     There are no implicit conversions between signed and unsigned
+     *     integers.
+     * \endverbatim
+     */
+    bool can_implicitly_convert_to(const glsl_type *desired,
+                                   _mesa_glsl_parse_state *state) const;
+ 
+    /**
+     * Query whether or not a type is a scalar (non-vector and non-matrix).
+     */
+    bool is_scalar() const
+    {
+       return (vector_elements == 1)
+        && (base_type >= GLSL_TYPE_UINT)
+        && (base_type <= GLSL_TYPE_BOOL);
+    }
+ 
+    /**
+     * Query whether or not a type is a vector
+     */
+    bool is_vector() const
+    {
+       return (vector_elements > 1)
+        && (matrix_columns == 1)
+        && (base_type >= GLSL_TYPE_UINT)
+        && (base_type <= GLSL_TYPE_BOOL);
+    }
+ 
+    /**
+     * Query whether or not a type is a matrix
+     */
+    bool is_matrix() const
+    {
+       /* GLSL only has float matrices. */
+       return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT || base_type == GLSL_TYPE_DOUBLE);
+    }
+ 
+    /**
+     * Query whether or not a type is a non-array numeric type
+     */
+    bool is_numeric() const
+    {
+       return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_DOUBLE);
+    }
+ 
+    /**
+     * Query whether or not a type is an integral type
+     */
+    bool is_integer() const
+    {
+       return (base_type == GLSL_TYPE_UINT) || (base_type == GLSL_TYPE_INT);
+    }
+ 
+    /**
+     * Query whether or not type is an integral type, or for struct and array
+     * types, contains an integral type.
+     */
+    bool contains_integer() const;
+ 
+    /**
+     * Query whether or not type is a double type, or for struct and array
+     * types, contains a double type.
+     */
+    bool contains_double() const;
+ 
+    /**
+     * Query whether or not a type is a float type
+     */
+    bool is_float() const
+    {
+       return base_type == GLSL_TYPE_FLOAT;
+    }
+ 
+    /**
+     * Query whether or not a type is a double type
+     */
+    bool is_double() const
+    {
+       return base_type == GLSL_TYPE_DOUBLE;
+    }
+ 
+    /**
+     * Query whether a double takes two slots.
+     */
+    bool is_dual_slot_double() const
+    {
+       return base_type == GLSL_TYPE_DOUBLE && vector_elements > 2;
+    }
+ 
+    /**
+     * Query whether or not a type is a non-array boolean type
+     */
+    bool is_boolean() const
+    {
+       return base_type == GLSL_TYPE_BOOL;
+    }
+ 
+    /**
+     * Query whether or not a type is a sampler
+     */
+    bool is_sampler() const
+    {
+       return base_type == GLSL_TYPE_SAMPLER;
+    }
+ 
+    /**
+     * Query whether or not type is a sampler, or for struct and array
+     * types, contains a sampler.
+     */
+    bool contains_sampler() const;
+ 
+    /**
+     * Get the Mesa texture target index for a sampler type.
+     */
+    gl_texture_index sampler_index() const;
+ 
+    /**
+     * Query whether or not type is an image, or for struct and array
+     * types, contains an image.
+     */
+    bool contains_image() const;
+ 
+    /**
+     * Query whether or not a type is an image
+     */
+    bool is_image() const
+    {
+       return base_type == GLSL_TYPE_IMAGE;
+    }
+ 
+    /**
+     * Query whether or not a type is an array
+     */
+    bool is_array() const
+    {
+       return base_type == GLSL_TYPE_ARRAY;
+    }
+ 
+    bool is_array_of_arrays() const
+    {
+       return is_array() && fields.array->is_array();
+    }
+ 
+    /**
+     * Query whether or not a type is a record
+     */
+    bool is_record() const
+    {
+       return base_type == GLSL_TYPE_STRUCT;
+    }
+ 
+    /**
+     * Query whether or not a type is an interface
+     */
+    bool is_interface() const
+    {
+       return base_type == GLSL_TYPE_INTERFACE;
+    }
+ 
+    /**
+     * Query whether or not a type is the void type singleton.
+     */
+    bool is_void() const
+    {
+       return base_type == GLSL_TYPE_VOID;
+    }
+ 
+    /**
+     * Query whether or not a type is the error type singleton.
+     */
+    bool is_error() const
+    {
+       return base_type == GLSL_TYPE_ERROR;
+    }
+ 
+    /**
+     * Query if a type is unnamed/anonymous (named by the parser)
+     */
+ 
+    bool is_subroutine() const
+    {
+       return base_type == GLSL_TYPE_SUBROUTINE;
+    }
+    bool contains_subroutine() const;
+ 
+    bool is_anonymous() const
+    {
+       return !strncmp(name, "#anon", 5);
+    }
+ 
+    /**
+     * Get the type stripped of any arrays
+     *
+     * \return
+     * Pointer to the type of elements of the first non-array type for array
+     * types, or pointer to itself for non-array types.
+     */
+    const glsl_type *without_array() const
+    {
+       const glsl_type *t = this;
+ 
+       while (t->is_array())
+          t = t->fields.array;
+ 
+       return t;
+    }
+ 
+    /**
+     * Return the total number of elements in an array including the elements
+     * in arrays of arrays.
+     */
+    unsigned arrays_of_arrays_size() const
+    {
+       if (!is_array())
+          return 0;
+ 
+       unsigned size = length;
+       const glsl_type *base_type = fields.array;
+ 
+       while (base_type->is_array()) {
+          size = size * base_type->length;
+          base_type = base_type->fields.array;
+       }
+       return size;
+    }
+ 
+    /**
+     * Return the amount of atomic counter storage required for a type.
+     */
+    unsigned atomic_size() const
+    {
+       if (base_type == GLSL_TYPE_ATOMIC_UINT)
+          return ATOMIC_COUNTER_SIZE;
+       else if (is_array())
+          return length * fields.array->atomic_size();
+       else
+          return 0;
+    }
+ 
+    /**
+     * Return whether a type contains any atomic counters.
+     */
+    bool contains_atomic() const
+    {
+       return atomic_size() > 0;
+    }
+ 
+    /**
+     * Return whether a type contains any opaque types.
+     */
+    bool contains_opaque() const;
+ 
+    /**
+     * Query the full type of a matrix row
+     *
+     * \return
+     * If the type is not a matrix, \c glsl_type::error_type is returned.
+     * Otherwise a type matching the rows of the matrix is returned.
+     */
+    const glsl_type *row_type() const
+    {
+       return is_matrix()
+        ? get_instance(base_type, matrix_columns, 1)
+        : error_type;
+    }
+ 
+    /**
+     * Query the full type of a matrix column
+     *
+     * \return
+     * If the type is not a matrix, \c glsl_type::error_type is returned.
+     * Otherwise a type matching the columns of the matrix is returned.
+     */
+    const glsl_type *column_type() const
+    {
+       return is_matrix()
+        ? get_instance(base_type, vector_elements, 1)
+        : error_type;
+    }
+ 
+    /**
+     * Get the type of a structure field
+     *
+     * \return
+     * Pointer to the type of the named field.  If the type is not a structure
+     * or the named field does not exist, \c glsl_type::error_type is returned.
+     */
+    const glsl_type *field_type(const char *name) const;
+ 
+    /**
+     * Get the location of a field within a record type
+     */
+    int field_index(const char *name) const;
+ 
+    /**
+     * Query the number of elements in an array type
+     *
+     * \return
+     * The number of elements in the array for array types or -1 for non-array
+     * types.  If the number of elements in the array has not yet been declared,
+     * zero is returned.
+     */
+    int array_size() const
+    {
+       return is_array() ? length : -1;
+    }
+ 
+    /**
+     * Query whether the array size for all dimensions has been declared.
+     */
+    bool is_unsized_array() const
+    {
+       return is_array() && length == 0;
+    }
+ 
+    /**
+     * Return the number of coordinate components needed for this
+     * sampler or image type.
+     *
+     * This is based purely on the sampler's dimensionality.  For example, this
+     * returns 1 for sampler1D, and 3 for sampler2DArray.
+     *
+     * Note that this is often different than actual coordinate type used in
+     * a texturing built-in function, since those pack additional values (such
+     * as the shadow comparitor or projector) into the coordinate type.
+     */
+    int coordinate_components() const;
+ 
+    /**
+     * Compare a record type against another record type.
+     *
+     * This is useful for matching record types declared across shader stages.
+     */
+    bool record_compare(const glsl_type *b) const;
+ 
+ private:
+ 
+    static mtx_t mutex;
+ 
+    /**
+     * ralloc context for all glsl_type allocations
+     *
+     * Set on the first call to \c glsl_type::new.
+     */
+    static void *mem_ctx;
+ 
+    void init_ralloc_type_ctx(void);
+ 
+    /** Constructor for vector and matrix types */
+    glsl_type(GLenum gl_type,
+            glsl_base_type base_type, unsigned vector_elements,
+            unsigned matrix_columns, const char *name);
+ 
+    /** Constructor for sampler or image types */
+    glsl_type(GLenum gl_type, glsl_base_type base_type,
+            enum glsl_sampler_dim dim, bool shadow, bool array,
+            unsigned type, const char *name);
+ 
+    /** Constructor for record types */
+    glsl_type(const glsl_struct_field *fields, unsigned num_fields,
+            const char *name);
+ 
+    /** Constructor for interface types */
+    glsl_type(const glsl_struct_field *fields, unsigned num_fields,
+            enum glsl_interface_packing packing, const char *name);
+ 
++   /** Constructor for interface types */
++   glsl_type(const glsl_type *return_type,
++             const glsl_function_param *params, unsigned num_params);
++
+    /** Constructor for array types */
+    glsl_type(const glsl_type *array, unsigned length);
+ 
+    /** Constructor for subroutine types */
+    glsl_type(const char *name);
+ 
+    /** Hash table containing the known array types. */
+    static struct hash_table *array_types;
+ 
+    /** Hash table containing the known record types. */
+    static struct hash_table *record_types;
+ 
+    /** Hash table containing the known interface types. */
+    static struct hash_table *interface_types;
+ 
+    /** Hash table containing the known subroutine types. */
+    static struct hash_table *subroutine_types;
+ 
++   /** Hash table containing the known function types. */
++   static struct hash_table *function_types;
++
+    static bool record_key_compare(const void *a, const void *b);
+    static unsigned record_key_hash(const void *key);
+ 
+    /**
+     * \name Built-in type flyweights
+     */
+    /*@{*/
+ #undef  DECL_TYPE
+ #define DECL_TYPE(NAME, ...) static const glsl_type _##NAME##_type;
+ #undef  STRUCT_TYPE
+ #define STRUCT_TYPE(NAME)        static const glsl_type _struct_##NAME##_type;
+ #include "compiler/builtin_type_macros.h"
+    /*@}*/
+ 
+    /**
+     * \name Friend functions.
+     *
+     * These functions are friends because they must have C linkage and the
+     * need to call various private methods or access various private static
+     * data.
+     */
+    /*@{*/
+    friend void _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *);
+    friend void _mesa_glsl_release_types(void);
+    /*@}*/
+ };
+ 
++#undef DECL_TYPE
++#undef STRUCT_TYPE
++#endif /* __cplusplus */
++
+ struct glsl_struct_field {
+    const struct glsl_type *type;
+    const char *name;
+ 
+    /**
+     * For interface blocks, gl_varying_slot corresponding to the input/output
+     * if this is a built-in input/output (i.e. a member of the built-in
+     * gl_PerVertex interface block); -1 otherwise.
+     *
+     * Ignored for structs.
+     */
+    int location;
+ 
+    /**
+     * For interface blocks, the interpolation mode (as in
+     * ir_variable::interpolation).  0 otherwise.
+     */
+    unsigned interpolation:2;
+ 
+    /**
+     * For interface blocks, 1 if this variable uses centroid interpolation (as
+     * in ir_variable::centroid).  0 otherwise.
+     */
+    unsigned centroid:1;
+ 
+    /**
+     * For interface blocks, 1 if this variable uses sample interpolation (as
+     * in ir_variable::sample). 0 otherwise.
+     */
+    unsigned sample:1;
+ 
+    /**
+     * Layout of the matrix.  Uses glsl_matrix_layout values.
+     */
+    unsigned matrix_layout:2;
+ 
+    /**
+     * For interface blocks, 1 if this variable is a per-patch input or output
+     * (as in ir_variable::patch). 0 otherwise.
+     */
+    unsigned patch:1;
+ 
+    /**
+     * Precision qualifier
+     */
+    unsigned precision:2;
+ 
+    /**
+     * Image qualifiers, applicable to buffer variables defined in shader
+     * storage buffer objects (SSBOs)
+     */
+    unsigned image_read_only:1;
+    unsigned image_write_only:1;
+    unsigned image_coherent:1;
+    unsigned image_volatile:1;
+    unsigned image_restrict:1;
+ 
++#ifdef __cplusplus
+    glsl_struct_field(const struct glsl_type *_type, const char *_name)
+       : type(_type), name(_name), location(-1), interpolation(0), centroid(0),
+         sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
+         precision(GLSL_PRECISION_NONE)
+    {
+       /* empty */
+    }
+ 
+    glsl_struct_field()
+    {
+       /* empty */
+    }
++#endif
++};
++
++struct glsl_function_param {
++   const struct glsl_type *type;
++
++   bool in;
++   bool out;
+ };
+ 
+ static inline unsigned int
+ glsl_align(unsigned int a, unsigned int align)
+ {
+    return (a + align - 1) / align * align;
+ }
+ 
+ #endif /* GLSL_TYPES_H */
diff --cc src/compiler/nir/Makefile.sources

index 0000000000000000000000000000000000000000,0755a100e65cb6d5feddf59644b3ff8a12e8c57f..04e8ab88a35e8ff6c1fa5182d7d4ccf67ea3a1c8

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/Makefile.sources
+++ b/src/compiler/nir/Makefile.sources
@@@ -1,0 -1,71 +1,87 @@@
+ NIR_GENERATED_FILES = \
+       nir_builder_opcodes.h \
+       nir_constant_expressions.c \
+       nir_opcodes.c \
+       nir_opcodes.h \
+       nir_opt_algebraic.c
+ 
+ NIR_FILES = \
+       glsl_to_nir.cpp \
+       glsl_to_nir.h \
+       nir.c \
+       nir.h \
+       nir_array.h \
+       nir_builder.h \
+       nir_clone.c \
+       nir_constant_expressions.h \
+       nir_control_flow.c \
+       nir_control_flow.h \
+       nir_control_flow_private.h \
+       nir_dominance.c \
+       nir_from_ssa.c \
++      nir_gather_info.c \
+       nir_gs_count_vertices.c \
++      nir_inline_functions.c \
+       nir_intrinsics.c \
+       nir_intrinsics.h \
+       nir_instr_set.c \
+       nir_instr_set.h \
+       nir_liveness.c \
+       nir_lower_alu_to_scalar.c \
+       nir_lower_atomics.c \
+       nir_lower_clip.c \
+       nir_lower_global_vars_to_local.c \
+       nir_lower_gs_intrinsics.c \
++        nir_lower_indirect_derefs.c \
+       nir_lower_load_const_to_scalar.c \
+       nir_lower_locals_to_regs.c \
+       nir_lower_idiv.c \
+       nir_lower_io.c \
+       nir_lower_outputs_to_temporaries.c \
+       nir_lower_phis_to_scalar.c \
++      nir_lower_returns.c \
+       nir_lower_samplers.c \
+       nir_lower_system_values.c \
+       nir_lower_tex.c \
+       nir_lower_to_source_mods.c \
+       nir_lower_two_sided_color.c \
+       nir_lower_vars_to_ssa.c \
+       nir_lower_var_copies.c \
+       nir_lower_vec_to_movs.c \
+       nir_metadata.c \
+       nir_move_vec_src_uses_to_dest.c \
+       nir_normalize_cubemap_coords.c \
+       nir_opt_constant_folding.c \
+       nir_opt_copy_propagate.c \
+       nir_opt_cse.c \
+       nir_opt_dce.c \
+       nir_opt_dead_cf.c \
+       nir_opt_gcm.c \
+       nir_opt_global_to_local.c \
+       nir_opt_peephole_select.c \
+       nir_opt_remove_phis.c \
+       nir_opt_undef.c \
++      nir_phi_builder.c \
++      nir_phi_builder.h \
+       nir_print.c \
+       nir_remove_dead_variables.c \
++      nir_repair_ssa.c \
+       nir_search.c \
+       nir_search.h \
+       nir_split_var_copies.c \
+       nir_sweep.c \
+       nir_to_ssa.c \
+       nir_validate.c \
+       nir_vla.h \
+       nir_worklist.c \
+       nir_worklist.h
+ 
++SPIRV_FILES = \
++      spirv/nir_spirv.h \
++      spirv/spirv_to_nir.c \
++      spirv/vtn_alu.c \
++      spirv/vtn_cfg.c \
++      spirv/vtn_glsl450.c \
++      spirv/vtn_private.h \
++      spirv/vtn_variables.c
++
diff --cc src/compiler/nir/glsl_to_nir.cpp

index 0000000000000000000000000000000000000000,4b76d234420350e73f22de75bc835e9a95d8ae28..2a3047dd33c9fc3dfa5bfcc7889b8d38e0ea6b4d

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@@ -1,0 -1,2031 +1,2035 @@@
- -   nir_visitor(nir_shader *shader);
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ #include "glsl_to_nir.h"
+ #include "nir_control_flow.h"
+ #include "nir_builder.h"
+ #include "compiler/glsl/ir_visitor.h"
+ #include "compiler/glsl/ir_hierarchical_visitor.h"
+ #include "compiler/glsl/ir.h"
+ #include "main/imports.h"
+ 
+ /*
+  * pass to lower GLSL IR to NIR
+  *
+  * This will lower variable dereferences to loads/stores of corresponding
+  * variables in NIR - the variables will be converted to registers in a later
+  * pass.
+  */
+ 
+ namespace {
+ 
+ class nir_visitor : public ir_visitor
+ {
+ public:
- -   nir_visitor v1(shader);
++   nir_visitor(nir_shader *shader, gl_shader *sh);
+    ~nir_visitor();
+ 
+    virtual void visit(ir_variable *);
+    virtual void visit(ir_function *);
+    virtual void visit(ir_function_signature *);
+    virtual void visit(ir_loop *);
+    virtual void visit(ir_if *);
+    virtual void visit(ir_discard *);
+    virtual void visit(ir_loop_jump *);
+    virtual void visit(ir_return *);
+    virtual void visit(ir_call *);
+    virtual void visit(ir_assignment *);
+    virtual void visit(ir_emit_vertex *);
+    virtual void visit(ir_end_primitive *);
+    virtual void visit(ir_expression *);
+    virtual void visit(ir_swizzle *);
+    virtual void visit(ir_texture *);
+    virtual void visit(ir_constant *);
+    virtual void visit(ir_dereference_variable *);
+    virtual void visit(ir_dereference_record *);
+    virtual void visit(ir_dereference_array *);
+    virtual void visit(ir_barrier *);
+ 
+    void create_function(ir_function_signature *ir);
+ 
+ private:
+    void add_instr(nir_instr *instr, unsigned num_components);
+    nir_ssa_def *evaluate_rvalue(ir_rvalue *ir);
+ 
+    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs);
+    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1);
+    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
+                        nir_ssa_def *src2);
+    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
+                        nir_ssa_def *src2, nir_ssa_def *src3);
+ 
+    bool supports_ints;
+ 
++   struct gl_shader *sh;
++
+    nir_shader *shader;
+    nir_function_impl *impl;
+    nir_builder b;
+    nir_ssa_def *result; /* result of the expression tree last visited */
+ 
+    nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir);
+ 
+    /* the head of the dereference chain we're creating */
+    nir_deref_var *deref_head;
+    /* the tail of the dereference chain we're creating */
+    nir_deref *deref_tail;
+ 
+    nir_variable *var; /* variable created by ir_variable visitor */
+ 
+    /* whether the IR we're operating on is per-function or global */
+    bool is_global;
+ 
+    /* map of ir_variable -> nir_variable */
+    struct hash_table *var_table;
+ 
+    /* map of ir_function_signature -> nir_function_overload */
+    struct hash_table *overload_table;
+ };
+ 
+ /*
+  * This visitor runs before the main visitor, calling create_function() for
+  * each function so that the main visitor can resolve forward references in
+  * calls.
+  */
+ 
+ class nir_function_visitor : public ir_hierarchical_visitor
+ {
+ public:
+    nir_function_visitor(nir_visitor *v) : visitor(v)
+    {
+    }
+    virtual ir_visitor_status visit_enter(ir_function *);
+ 
+ private:
+    nir_visitor *visitor;
+ };
+ 
+ }; /* end of anonymous namespace */
+ 
+ nir_shader *
+ glsl_to_nir(const struct gl_shader_program *shader_prog,
+             gl_shader_stage stage,
+             const nir_shader_compiler_options *options)
+ {
+    struct gl_shader *sh = shader_prog->_LinkedShaders[stage];
+ 
+    nir_shader *shader = nir_shader_create(NULL, stage, options);
+ 
- -   nir_lower_outputs_to_temporaries(shader);
++   nir_visitor v1(shader, sh);
+    nir_function_visitor v2(&v1);
+    v2.run(sh->ir);
+    visit_exec_list(sh->ir, &v1);
+ 
- -nir_visitor::nir_visitor(nir_shader *shader)
++   nir_function *main = NULL;
++   nir_foreach_function(shader, func) {
++      if (strcmp(func->name, "main") == 0) {
++         main = func;
++         break;
++      }
++   }
++   assert(main);
++
++   nir_lower_outputs_to_temporaries(shader, main);
+ 
+    shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
+    if (shader_prog->Label)
+       shader->info.label = ralloc_strdup(shader, shader_prog->Label);
+    shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed);
+    shader->info.num_ubos = sh->NumUniformBlocks;
+    shader->info.num_abos = shader_prog->NumAtomicBuffers;
+    shader->info.num_ssbos = sh->NumShaderStorageBlocks;
+    shader->info.num_images = sh->NumImages;
+    shader->info.inputs_read = sh->Program->InputsRead;
+    shader->info.outputs_written = sh->Program->OutputsWritten;
+    shader->info.patch_inputs_read = sh->Program->PatchInputsRead;
+    shader->info.patch_outputs_written = sh->Program->PatchOutputsWritten;
+    shader->info.system_values_read = sh->Program->SystemValuesRead;
+    shader->info.uses_texture_gather = sh->Program->UsesGather;
+    shader->info.uses_clip_distance_out =
+       sh->Program->ClipDistanceArraySize != 0;
+    shader->info.separate_shader = shader_prog->SeparateShader;
+    shader->info.has_transform_feedback_varyings =
+       shader_prog->TransformFeedback.NumVarying > 0;
+ 
+    switch (stage) {
+    case MESA_SHADER_TESS_CTRL:
+       shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut;
+       break;
+ 
+    case MESA_SHADER_GEOMETRY:
+       shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn;
+       shader->info.gs.output_primitive = sh->Geom.OutputType;
+       shader->info.gs.vertices_out = sh->Geom.VerticesOut;
+       shader->info.gs.invocations = sh->Geom.Invocations;
+       shader->info.gs.uses_end_primitive = shader_prog->Geom.UsesEndPrimitive;
+       shader->info.gs.uses_streams = shader_prog->Geom.UsesStreams;
+       break;
+ 
+    case MESA_SHADER_FRAGMENT: {
+       struct gl_fragment_program *fp =
+          (struct gl_fragment_program *)sh->Program;
+ 
+       shader->info.fs.uses_discard = fp->UsesKill;
+       shader->info.fs.early_fragment_tests = sh->EarlyFragmentTests;
+       shader->info.fs.depth_layout = fp->FragDepthLayout;
+       break;
+    }
+ 
+    case MESA_SHADER_COMPUTE: {
+       struct gl_compute_program *cp = (struct gl_compute_program *)sh->Program;
+       shader->info.cs.local_size[0] = cp->LocalSize[0];
+       shader->info.cs.local_size[1] = cp->LocalSize[1];
+       shader->info.cs.local_size[2] = cp->LocalSize[2];
+       break;
+    }
+ 
+    default:
+       break; /* No stage-specific info */
+    }
+ 
+    return shader;
+ }
+ 
- -   case ir_unop_unpack_half_2x16_split_x:
- -      result = nir_unpack_half_2x16_split_x(&b, srcs[0]);
- -      break;
- -   case ir_unop_unpack_half_2x16_split_y:
- -      result = nir_unpack_half_2x16_split_y(&b, srcs[0]);
- -      break;
++nir_visitor::nir_visitor(nir_shader *shader, gl_shader *sh)
+ {
+    this->supports_ints = shader->options->native_integers;
+    this->shader = shader;
++   this->sh = sh;
+    this->is_global = true;
+    this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                              _mesa_key_pointer_equal);
+    this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                                   _mesa_key_pointer_equal);
+ }
+ 
+ nir_visitor::~nir_visitor()
+ {
+    _mesa_hash_table_destroy(this->var_table, NULL);
+    _mesa_hash_table_destroy(this->overload_table, NULL);
+ }
+ 
+ nir_deref_var *
+ nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir)
+ {
+    ir->accept(this);
+    ralloc_steal(mem_ctx, this->deref_head);
+    return this->deref_head;
+ }
+ 
+ static nir_constant *
+ constant_copy(ir_constant *ir, void *mem_ctx)
+ {
+    if (ir == NULL)
+       return NULL;
+ 
+    nir_constant *ret = ralloc(mem_ctx, nir_constant);
+ 
+    unsigned total_elems = ir->type->components();
+    unsigned i;
+ 
+    ret->num_elements = 0;
+    switch (ir->type->base_type) {
+    case GLSL_TYPE_UINT:
+       for (i = 0; i < total_elems; i++)
+          ret->value.u[i] = ir->value.u[i];
+       break;
+ 
+    case GLSL_TYPE_INT:
+       for (i = 0; i < total_elems; i++)
+          ret->value.i[i] = ir->value.i[i];
+       break;
+ 
+    case GLSL_TYPE_FLOAT:
+       for (i = 0; i < total_elems; i++)
+          ret->value.f[i] = ir->value.f[i];
+       break;
+ 
+    case GLSL_TYPE_BOOL:
+       for (i = 0; i < total_elems; i++)
+          ret->value.b[i] = ir->value.b[i];
+       break;
+ 
+    case GLSL_TYPE_STRUCT:
+       ret->elements = ralloc_array(mem_ctx, nir_constant *,
+                                    ir->type->length);
+       ret->num_elements = ir->type->length;
+ 
+       i = 0;
+       foreach_in_list(ir_constant, field, &ir->components) {
+          ret->elements[i] = constant_copy(field, mem_ctx);
+          i++;
+       }
+       break;
+ 
+    case GLSL_TYPE_ARRAY:
+       ret->elements = ralloc_array(mem_ctx, nir_constant *,
+                                    ir->type->length);
+       ret->num_elements = ir->type->length;
+ 
+       for (i = 0; i < ir->type->length; i++)
+          ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx);
+       break;
+ 
+    default:
+       unreachable("not reached");
+    }
+ 
+    return ret;
+ }
+ 
+ void
+ nir_visitor::visit(ir_variable *ir)
+ {
+    nir_variable *var = ralloc(shader, nir_variable);
+    var->type = ir->type;
+    var->name = ralloc_strdup(var, ir->name);
+ 
+    var->data.read_only = ir->data.read_only;
+    var->data.centroid = ir->data.centroid;
+    var->data.sample = ir->data.sample;
+    var->data.patch = ir->data.patch;
+    var->data.invariant = ir->data.invariant;
+    var->data.location = ir->data.location;
+ 
+    switch(ir->data.mode) {
+    case ir_var_auto:
+    case ir_var_temporary:
+       if (is_global)
+          var->data.mode = nir_var_global;
+       else
+          var->data.mode = nir_var_local;
+       break;
+ 
+    case ir_var_function_in:
+    case ir_var_function_out:
+    case ir_var_function_inout:
+    case ir_var_const_in:
+       var->data.mode = nir_var_local;
+       break;
+ 
+    case ir_var_shader_in:
+       if (shader->stage == MESA_SHADER_FRAGMENT &&
+           ir->data.location == VARYING_SLOT_FACE) {
+          /* For whatever reason, GLSL IR makes gl_FrontFacing an input */
+          var->data.location = SYSTEM_VALUE_FRONT_FACE;
+          var->data.mode = nir_var_system_value;
+       } else if (shader->stage == MESA_SHADER_GEOMETRY &&
+                  ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
+          /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */
+          var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;
+          var->data.mode = nir_var_system_value;
+       } else {
+          var->data.mode = nir_var_shader_in;
+       }
+       break;
+ 
+    case ir_var_shader_out:
+       var->data.mode = nir_var_shader_out;
+       break;
+ 
+    case ir_var_uniform:
+       var->data.mode = nir_var_uniform;
+       break;
+ 
+    case ir_var_shader_storage:
+       var->data.mode = nir_var_shader_storage;
+       break;
+ 
+    case ir_var_system_value:
+       var->data.mode = nir_var_system_value;
+       break;
+ 
+    default:
+       unreachable("not reached");
+    }
+ 
+    var->data.interpolation = ir->data.interpolation;
+    var->data.origin_upper_left = ir->data.origin_upper_left;
+    var->data.pixel_center_integer = ir->data.pixel_center_integer;
+    var->data.explicit_location = ir->data.explicit_location;
+    var->data.explicit_index = ir->data.explicit_index;
+    var->data.explicit_binding = ir->data.explicit_binding;
+    var->data.has_initializer = ir->data.has_initializer;
+    var->data.location_frac = ir->data.location_frac;
+    var->data.from_named_ifc_block_array = ir->data.from_named_ifc_block_array;
+    var->data.from_named_ifc_block_nonarray = ir->data.from_named_ifc_block_nonarray;
+ 
+    switch (ir->data.depth_layout) {
+    case ir_depth_layout_none:
+       var->data.depth_layout = nir_depth_layout_none;
+       break;
+    case ir_depth_layout_any:
+       var->data.depth_layout = nir_depth_layout_any;
+       break;
+    case ir_depth_layout_greater:
+       var->data.depth_layout = nir_depth_layout_greater;
+       break;
+    case ir_depth_layout_less:
+       var->data.depth_layout = nir_depth_layout_less;
+       break;
+    case ir_depth_layout_unchanged:
+       var->data.depth_layout = nir_depth_layout_unchanged;
+       break;
+    default:
+       unreachable("not reached");
+    }
+ 
+    var->data.index = ir->data.index;
++   var->data.descriptor_set = 0;
+    var->data.binding = ir->data.binding;
+    var->data.offset = ir->data.offset;
+    var->data.image.read_only = ir->data.image_read_only;
+    var->data.image.write_only = ir->data.image_write_only;
+    var->data.image.coherent = ir->data.image_coherent;
+    var->data.image._volatile = ir->data.image_volatile;
+    var->data.image.restrict_flag = ir->data.image_restrict;
+    var->data.image.format = ir->data.image_format;
+    var->data.max_array_access = ir->data.max_array_access;
+ 
+    var->num_state_slots = ir->get_num_state_slots();
+    if (var->num_state_slots > 0) {
+       var->state_slots = ralloc_array(var, nir_state_slot,
+                                       var->num_state_slots);
+ 
+       ir_state_slot *state_slots = ir->get_state_slots();
+       for (unsigned i = 0; i < var->num_state_slots; i++) {
+          for (unsigned j = 0; j < 5; j++)
+             var->state_slots[i].tokens[j] = state_slots[i].tokens[j];
+          var->state_slots[i].swizzle = state_slots[i].swizzle;
+       }
+    } else {
+       var->state_slots = NULL;
+    }
+ 
+    var->constant_initializer = constant_copy(ir->constant_initializer, var);
+ 
+    var->interface_type = ir->get_interface_type();
+ 
+    if (var->data.mode == nir_var_local)
+       nir_function_impl_add_variable(impl, var);
+    else
+       nir_shader_add_variable(shader, var);
+ 
+    _mesa_hash_table_insert(var_table, ir, var);
+    this->var = var;
+ }
+ 
+ ir_visitor_status
+ nir_function_visitor::visit_enter(ir_function *ir)
+ {
+    foreach_in_list(ir_function_signature, sig, &ir->signatures) {
+       visitor->create_function(sig);
+    }
+    return visit_continue_with_parent;
+ }
+ 
+ void
+ nir_visitor::create_function(ir_function_signature *ir)
+ {
+    if (ir->is_intrinsic)
+       return;
+ 
+    nir_function *func = nir_function_create(shader, ir->function_name());
+ 
+    unsigned num_params = ir->parameters.length();
+    func->num_params = num_params;
+    func->params = ralloc_array(shader, nir_parameter, num_params);
+ 
+    unsigned i = 0;
+    foreach_in_list(ir_variable, param, &ir->parameters) {
+       switch (param->data.mode) {
+       case ir_var_function_in:
+          func->params[i].param_type = nir_parameter_in;
+          break;
+ 
+       case ir_var_function_out:
+          func->params[i].param_type = nir_parameter_out;
+          break;
+ 
+       case ir_var_function_inout:
+          func->params[i].param_type = nir_parameter_inout;
+          break;
+ 
+       default:
+          unreachable("not reached");
+       }
+ 
+       func->params[i].type = param->type;
+       i++;
+    }
+ 
+    func->return_type = ir->return_type;
+ 
+    _mesa_hash_table_insert(this->overload_table, ir, func);
+ }
+ 
+ void
+ nir_visitor::visit(ir_function *ir)
+ {
+    foreach_in_list(ir_function_signature, sig, &ir->signatures)
+       sig->accept(this);
+ }
+ 
+ void
+ nir_visitor::visit(ir_function_signature *ir)
+ {
+    if (ir->is_intrinsic)
+       return;
+ 
+    struct hash_entry *entry =
+       _mesa_hash_table_search(this->overload_table, ir);
+ 
+    assert(entry);
+    nir_function *func = (nir_function *) entry->data;
+ 
+    if (ir->is_defined) {
+       nir_function_impl *impl = nir_function_impl_create(func);
+       this->impl = impl;
+ 
+       unsigned num_params = func->num_params;
+       impl->num_params = num_params;
+       impl->params = ralloc_array(this->shader, nir_variable *, num_params);
+       unsigned i = 0;
+       foreach_in_list(ir_variable, param, &ir->parameters) {
+          param->accept(this);
+          impl->params[i] = this->var;
+          i++;
+       }
+ 
+       if (func->return_type == glsl_type::void_type) {
+          impl->return_var = NULL;
+       } else {
+          impl->return_var = ralloc(this->shader, nir_variable);
+          impl->return_var->name = ralloc_strdup(impl->return_var,
+                                                 "return_var");
+          impl->return_var->type = func->return_type;
+       }
+ 
+       this->is_global = false;
+ 
+       nir_builder_init(&b, impl);
+       b.cursor = nir_after_cf_list(&impl->body);
+       visit_exec_list(&ir->body, this);
+ 
+       this->is_global = true;
+    } else {
+       func->impl = NULL;
+    }
+ }
+ 
+ void
+ nir_visitor::visit(ir_loop *ir)
+ {
+    nir_loop *loop = nir_loop_create(this->shader);
+    nir_builder_cf_insert(&b, &loop->cf_node);
+ 
+    b.cursor = nir_after_cf_list(&loop->body);
+    visit_exec_list(&ir->body_instructions, this);
+    b.cursor = nir_after_cf_node(&loop->cf_node);
+ }
+ 
+ void
+ nir_visitor::visit(ir_if *ir)
+ {
+    nir_src condition =
+       nir_src_for_ssa(evaluate_rvalue(ir->condition));
+ 
+    nir_if *if_stmt = nir_if_create(this->shader);
+    if_stmt->condition = condition;
+    nir_builder_cf_insert(&b, &if_stmt->cf_node);
+ 
+    b.cursor = nir_after_cf_list(&if_stmt->then_list);
+    visit_exec_list(&ir->then_instructions, this);
+ 
+    b.cursor = nir_after_cf_list(&if_stmt->else_list);
+    visit_exec_list(&ir->else_instructions, this);
+ 
+    b.cursor = nir_after_cf_node(&if_stmt->cf_node);
+ }
+ 
+ void
+ nir_visitor::visit(ir_discard *ir)
+ {
+    /*
+     * discards aren't treated as control flow, because before we lower them
+     * they can appear anywhere in the shader and the stuff after them may still
+     * be executed (yay, crazy GLSL rules!). However, after lowering, all the
+     * discards will be immediately followed by a return.
+     */
+ 
+    nir_intrinsic_instr *discard;
+    if (ir->condition) {
+       discard = nir_intrinsic_instr_create(this->shader,
+                                            nir_intrinsic_discard_if);
+       discard->src[0] =
+          nir_src_for_ssa(evaluate_rvalue(ir->condition));
+    } else {
+       discard = nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard);
+    }
+ 
+    nir_builder_instr_insert(&b, &discard->instr);
+ }
+ 
+ void
+ nir_visitor::visit(ir_emit_vertex *ir)
+ {
+    nir_intrinsic_instr *instr =
+       nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex);
+    instr->const_index[0] = ir->stream_id();
+    nir_builder_instr_insert(&b, &instr->instr);
+ }
+ 
+ void
+ nir_visitor::visit(ir_end_primitive *ir)
+ {
+    nir_intrinsic_instr *instr =
+       nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive);
+    instr->const_index[0] = ir->stream_id();
+    nir_builder_instr_insert(&b, &instr->instr);
+ }
+ 
+ void
+ nir_visitor::visit(ir_loop_jump *ir)
+ {
+    nir_jump_type type;
+    switch (ir->mode) {
+    case ir_loop_jump::jump_break:
+       type = nir_jump_break;
+       break;
+    case ir_loop_jump::jump_continue:
+       type = nir_jump_continue;
+       break;
+    default:
+       unreachable("not reached");
+    }
+ 
+    nir_jump_instr *instr = nir_jump_instr_create(this->shader, type);
+    nir_builder_instr_insert(&b, &instr->instr);
+ }
+ 
+ void
+ nir_visitor::visit(ir_return *ir)
+ {
+    if (ir->value != NULL) {
+       nir_intrinsic_instr *copy =
+          nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
+ 
+       copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var);
+       copy->variables[1] = evaluate_deref(&copy->instr, ir->value);
+    }
+ 
+    nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
+    nir_builder_instr_insert(&b, &instr->instr);
+ }
+ 
+ void
+ nir_visitor::visit(ir_call *ir)
+ {
+    if (ir->callee->is_intrinsic) {
+       nir_intrinsic_op op;
+       if (strcmp(ir->callee_name(), "__intrinsic_atomic_read") == 0) {
+          op = nir_intrinsic_atomic_counter_read_var;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_increment") == 0) {
+          op = nir_intrinsic_atomic_counter_inc_var;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) {
+          op = nir_intrinsic_atomic_counter_dec_var;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_load") == 0) {
+          op = nir_intrinsic_image_load;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_store") == 0) {
+          op = nir_intrinsic_image_store;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_add") == 0) {
+          op = nir_intrinsic_image_atomic_add;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_min") == 0) {
+          op = nir_intrinsic_image_atomic_min;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_max") == 0) {
+          op = nir_intrinsic_image_atomic_max;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_and") == 0) {
+          op = nir_intrinsic_image_atomic_and;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_or") == 0) {
+          op = nir_intrinsic_image_atomic_or;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_xor") == 0) {
+          op = nir_intrinsic_image_atomic_xor;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_exchange") == 0) {
+          op = nir_intrinsic_image_atomic_exchange;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_comp_swap") == 0) {
+          op = nir_intrinsic_image_atomic_comp_swap;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) {
+          op = nir_intrinsic_memory_barrier;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) {
+          op = nir_intrinsic_image_size;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) {
+          op = nir_intrinsic_image_samples;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) {
+          op = nir_intrinsic_store_ssbo;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
+          op = nir_intrinsic_load_ssbo;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_ssbo") == 0) {
+          op = nir_intrinsic_ssbo_atomic_add;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_ssbo") == 0) {
+          op = nir_intrinsic_ssbo_atomic_and;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_ssbo") == 0) {
+          op = nir_intrinsic_ssbo_atomic_or;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_ssbo") == 0) {
+          op = nir_intrinsic_ssbo_atomic_xor;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_ssbo") == 0) {
+          assert(ir->return_deref);
+          if (ir->return_deref->type == glsl_type::int_type)
+             op = nir_intrinsic_ssbo_atomic_imin;
+          else if (ir->return_deref->type == glsl_type::uint_type)
+             op = nir_intrinsic_ssbo_atomic_umin;
+          else
+             unreachable("Invalid type");
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_ssbo") == 0) {
+          assert(ir->return_deref);
+          if (ir->return_deref->type == glsl_type::int_type)
+             op = nir_intrinsic_ssbo_atomic_imax;
+          else if (ir->return_deref->type == glsl_type::uint_type)
+             op = nir_intrinsic_ssbo_atomic_umax;
+          else
+             unreachable("Invalid type");
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_ssbo") == 0) {
+          op = nir_intrinsic_ssbo_atomic_exchange;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_ssbo") == 0) {
+          op = nir_intrinsic_ssbo_atomic_comp_swap;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) {
+          op = nir_intrinsic_shader_clock;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_group_memory_barrier") == 0) {
+          op = nir_intrinsic_group_memory_barrier;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_atomic_counter") == 0) {
+          op = nir_intrinsic_memory_barrier_atomic_counter;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_buffer") == 0) {
+          op = nir_intrinsic_memory_barrier_buffer;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_image") == 0) {
+          op = nir_intrinsic_memory_barrier_image;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_shared") == 0) {
+          op = nir_intrinsic_memory_barrier_shared;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_load_shared") == 0) {
+          op = nir_intrinsic_load_shared;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_store_shared") == 0) {
+          op = nir_intrinsic_store_shared;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_shared") == 0) {
+          op = nir_intrinsic_shared_atomic_add;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_shared") == 0) {
+          op = nir_intrinsic_shared_atomic_and;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_shared") == 0) {
+          op = nir_intrinsic_shared_atomic_or;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_shared") == 0) {
+          op = nir_intrinsic_shared_atomic_xor;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_shared") == 0) {
+          assert(ir->return_deref);
+          if (ir->return_deref->type == glsl_type::int_type)
+             op = nir_intrinsic_shared_atomic_imin;
+          else if (ir->return_deref->type == glsl_type::uint_type)
+             op = nir_intrinsic_shared_atomic_umin;
+          else
+             unreachable("Invalid type");
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_shared") == 0) {
+          assert(ir->return_deref);
+          if (ir->return_deref->type == glsl_type::int_type)
+             op = nir_intrinsic_shared_atomic_imax;
+          else if (ir->return_deref->type == glsl_type::uint_type)
+             op = nir_intrinsic_shared_atomic_umax;
+          else
+             unreachable("Invalid type");
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_shared") == 0) {
+          op = nir_intrinsic_shared_atomic_exchange;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_shared") == 0) {
+          op = nir_intrinsic_shared_atomic_comp_swap;
+       } else {
+          unreachable("not reached");
+       }
+ 
+       nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
+       nir_dest *dest = &instr->dest;
+ 
+       switch (op) {
+       case nir_intrinsic_atomic_counter_read_var:
+       case nir_intrinsic_atomic_counter_inc_var:
+       case nir_intrinsic_atomic_counter_dec_var: {
+          ir_dereference *param =
+             (ir_dereference *) ir->actual_parameters.get_head();
+          instr->variables[0] = evaluate_deref(&instr->instr, param);
+          nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+          nir_builder_instr_insert(&b, &instr->instr);
+          break;
+       }
+       case nir_intrinsic_image_load:
+       case nir_intrinsic_image_store:
+       case nir_intrinsic_image_atomic_add:
+       case nir_intrinsic_image_atomic_min:
+       case nir_intrinsic_image_atomic_max:
+       case nir_intrinsic_image_atomic_and:
+       case nir_intrinsic_image_atomic_or:
+       case nir_intrinsic_image_atomic_xor:
+       case nir_intrinsic_image_atomic_exchange:
+       case nir_intrinsic_image_atomic_comp_swap:
+       case nir_intrinsic_image_samples:
+       case nir_intrinsic_image_size: {
+          nir_ssa_undef_instr *instr_undef =
+             nir_ssa_undef_instr_create(shader, 1);
+          nir_builder_instr_insert(&b, &instr_undef->instr);
+ 
+          /* Set the image variable dereference. */
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_dereference *image = (ir_dereference *)param;
+          const glsl_type *type =
+             image->variable_referenced()->type->without_array();
+ 
+          instr->variables[0] = evaluate_deref(&instr->instr, image);
+          param = param->get_next();
+ 
+          /* Set the intrinsic destination. */
+          if (ir->return_deref) {
+             const nir_intrinsic_info *info =
+                     &nir_intrinsic_infos[instr->intrinsic];
+             nir_ssa_dest_init(&instr->instr, &instr->dest,
+                               info->dest_components, NULL);
+          }
+ 
+          if (op == nir_intrinsic_image_size ||
+              op == nir_intrinsic_image_samples) {
+             nir_builder_instr_insert(&b, &instr->instr);
+             break;
+          }
+ 
+          /* Set the address argument, extending the coordinate vector to four
+           * components.
+           */
+          nir_ssa_def *src_addr =
+             evaluate_rvalue((ir_dereference *)param);
+          nir_ssa_def *srcs[4];
+ 
+          for (int i = 0; i < 4; i++) {
+             if (i < type->coordinate_components())
+                srcs[i] = nir_channel(&b, src_addr, i);
+             else
+                srcs[i] = &instr_undef->def;
+          }
+ 
+          instr->src[0] = nir_src_for_ssa(nir_vec(&b, srcs, 4));
+          param = param->get_next();
+ 
+          /* Set the sample argument, which is undefined for single-sample
+           * images.
+           */
+          if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
+             instr->src[1] =
+                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
+             param = param->get_next();
+          } else {
+             instr->src[1] = nir_src_for_ssa(&instr_undef->def);
+          }
+ 
+          /* Set the intrinsic parameters. */
+          if (!param->is_tail_sentinel()) {
+             instr->src[2] =
+                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
+             param = param->get_next();
+          }
+ 
+          if (!param->is_tail_sentinel()) {
+             instr->src[3] =
+                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
+             param = param->get_next();
+          }
+          nir_builder_instr_insert(&b, &instr->instr);
+          break;
+       }
+       case nir_intrinsic_memory_barrier:
+       case nir_intrinsic_group_memory_barrier:
+       case nir_intrinsic_memory_barrier_atomic_counter:
+       case nir_intrinsic_memory_barrier_buffer:
+       case nir_intrinsic_memory_barrier_image:
+       case nir_intrinsic_memory_barrier_shared:
+          nir_builder_instr_insert(&b, &instr->instr);
+          break;
+       case nir_intrinsic_shader_clock:
+          nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+          nir_builder_instr_insert(&b, &instr->instr);
+          break;
+       case nir_intrinsic_store_ssbo: {
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+ 
+          param = param->get_next();
+          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+ 
+          param = param->get_next();
+          ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ 
+          param = param->get_next();
+          ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+          assert(write_mask);
+ 
+          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
+          instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
+          instr->const_index[0] = write_mask->value.u[0];
+          instr->num_components = val->type->vector_elements;
+ 
+          nir_builder_instr_insert(&b, &instr->instr);
+          break;
+       }
+       case nir_intrinsic_load_ssbo: {
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+ 
+          param = param->get_next();
+          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+ 
+          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block));
+          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
+ 
+          const glsl_type *type = ir->return_deref->var->type;
+          instr->num_components = type->vector_elements;
+ 
+          /* Setup destination register */
+          nir_ssa_dest_init(&instr->instr, &instr->dest,
+                            type->vector_elements, NULL);
+ 
+          /* Insert the created nir instruction now since in the case of boolean
+           * result we will need to emit another instruction after it
+           */
+          nir_builder_instr_insert(&b, &instr->instr);
+ 
+          /*
+           * In SSBO/UBO's, a true boolean value is any non-zero value, but we
+           * consider a true boolean to be ~0. Fix this up with a != 0
+           * comparison.
+           */
+          if (type->base_type == GLSL_TYPE_BOOL) {
+             nir_alu_instr *load_ssbo_compare =
+                nir_alu_instr_create(shader, nir_op_ine);
+             load_ssbo_compare->src[0].src.is_ssa = true;
+             load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa;
+             load_ssbo_compare->src[1].src =
+                nir_src_for_ssa(nir_imm_int(&b, 0));
+             for (unsigned i = 0; i < type->vector_elements; i++)
+                load_ssbo_compare->src[1].swizzle[i] = 0;
+             nir_ssa_dest_init(&load_ssbo_compare->instr,
+                               &load_ssbo_compare->dest.dest,
+                               type->vector_elements, NULL);
+             load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
+             nir_builder_instr_insert(&b, &load_ssbo_compare->instr);
+             dest = &load_ssbo_compare->dest.dest;
+          }
+          break;
+       }
+       case nir_intrinsic_ssbo_atomic_add:
+       case nir_intrinsic_ssbo_atomic_imin:
+       case nir_intrinsic_ssbo_atomic_umin:
+       case nir_intrinsic_ssbo_atomic_imax:
+       case nir_intrinsic_ssbo_atomic_umax:
+       case nir_intrinsic_ssbo_atomic_and:
+       case nir_intrinsic_ssbo_atomic_or:
+       case nir_intrinsic_ssbo_atomic_xor:
+       case nir_intrinsic_ssbo_atomic_exchange:
+       case nir_intrinsic_ssbo_atomic_comp_swap: {
+          int param_count = ir->actual_parameters.length();
+          assert(param_count == 3 || param_count == 4);
+ 
+          /* Block index */
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_instruction *inst = (ir_instruction *) param;
+          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+ 
+          /* Offset */
+          param = param->get_next();
+          inst = (ir_instruction *) param;
+          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+ 
+          /* data1 parameter (this is always present) */
+          param = param->get_next();
+          inst = (ir_instruction *) param;
+          instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+ 
+          /* data2 parameter (only with atomic_comp_swap) */
+          if (param_count == 4) {
+             assert(op == nir_intrinsic_ssbo_atomic_comp_swap);
+             param = param->get_next();
+             inst = (ir_instruction *) param;
+             instr->src[3] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+          }
+ 
+          /* Atomic result */
+          assert(ir->return_deref);
+          nir_ssa_dest_init(&instr->instr, &instr->dest,
+                            ir->return_deref->type->vector_elements, NULL);
+          nir_builder_instr_insert(&b, &instr->instr);
+          break;
+       }
+       case nir_intrinsic_load_shared: {
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+ 
+          instr->const_index[0] = 0;
+          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));
+ 
+          const glsl_type *type = ir->return_deref->var->type;
+          instr->num_components = type->vector_elements;
+ 
+          /* Setup destination register */
+          nir_ssa_dest_init(&instr->instr, &instr->dest,
+                            type->vector_elements, NULL);
+ 
+          nir_builder_instr_insert(&b, &instr->instr);
+          break;
+       }
+       case nir_intrinsic_store_shared: {
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+ 
+          param = param->get_next();
+          ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ 
+          param = param->get_next();
+          ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+          assert(write_mask);
+ 
+          instr->const_index[0] = 0;
+          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
+ 
+          instr->const_index[1] = write_mask->value.u[0];
+ 
+          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+          instr->num_components = val->type->vector_elements;
+ 
+          nir_builder_instr_insert(&b, &instr->instr);
+          break;
+       }
+       case nir_intrinsic_shared_atomic_add:
+       case nir_intrinsic_shared_atomic_imin:
+       case nir_intrinsic_shared_atomic_umin:
+       case nir_intrinsic_shared_atomic_imax:
+       case nir_intrinsic_shared_atomic_umax:
+       case nir_intrinsic_shared_atomic_and:
+       case nir_intrinsic_shared_atomic_or:
+       case nir_intrinsic_shared_atomic_xor:
+       case nir_intrinsic_shared_atomic_exchange:
+       case nir_intrinsic_shared_atomic_comp_swap: {
+          int param_count = ir->actual_parameters.length();
+          assert(param_count == 2 || param_count == 3);
+ 
+          /* Offset */
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_instruction *inst = (ir_instruction *) param;
+          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+ 
+          /* data1 parameter (this is always present) */
+          param = param->get_next();
+          inst = (ir_instruction *) param;
+          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+ 
+          /* data2 parameter (only with atomic_comp_swap) */
+          if (param_count == 3) {
+             assert(op == nir_intrinsic_shared_atomic_comp_swap);
+             param = param->get_next();
+             inst = (ir_instruction *) param;
+             instr->src[2] =
+                nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+          }
+ 
+          /* Atomic result */
+          assert(ir->return_deref);
+          nir_ssa_dest_init(&instr->instr, &instr->dest,
+                            ir->return_deref->type->vector_elements, NULL);
+          nir_builder_instr_insert(&b, &instr->instr);
+          break;
+       }
+       default:
+          unreachable("not reached");
+       }
+ 
+       if (ir->return_deref) {
+          nir_intrinsic_instr *store_instr =
+             nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
+          store_instr->num_components = ir->return_deref->type->vector_elements;
+          store_instr->const_index[0] = (1 << store_instr->num_components) - 1;
+ 
+          store_instr->variables[0] =
+             evaluate_deref(&store_instr->instr, ir->return_deref);
+          store_instr->src[0] = nir_src_for_ssa(&dest->ssa);
+ 
+          nir_builder_instr_insert(&b, &store_instr->instr);
+       }
+ 
+       return;
+    }
+ 
+    struct hash_entry *entry =
+       _mesa_hash_table_search(this->overload_table, ir->callee);
+    assert(entry);
+    nir_function *callee = (nir_function *) entry->data;
+ 
+    nir_call_instr *instr = nir_call_instr_create(this->shader, callee);
+ 
+    unsigned i = 0;
+    foreach_in_list(ir_dereference, param, &ir->actual_parameters) {
+       instr->params[i] = evaluate_deref(&instr->instr, param);
+       i++;
+    }
+ 
+    instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref);
+    nir_builder_instr_insert(&b, &instr->instr);
+ }
+ 
+ void
+ nir_visitor::visit(ir_assignment *ir)
+ {
+    unsigned num_components = ir->lhs->type->vector_elements;
+ 
+    if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
+        (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) {
+       /* We're doing a plain-as-can-be copy, so emit a copy_var */
+       nir_intrinsic_instr *copy =
+          nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
+ 
+       copy->variables[0] = evaluate_deref(&copy->instr, ir->lhs);
+       copy->variables[1] = evaluate_deref(&copy->instr, ir->rhs);
+ 
+       if (ir->condition) {
+          nir_if *if_stmt = nir_if_create(this->shader);
+          if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition));
+          nir_builder_cf_insert(&b, &if_stmt->cf_node);
+          nir_instr_insert_after_cf_list(&if_stmt->then_list, &copy->instr);
+          b.cursor = nir_after_cf_node(&if_stmt->cf_node);
+       } else {
+          nir_builder_instr_insert(&b, &copy->instr);
+       }
+       return;
+    }
+ 
+    assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector());
+ 
+    ir->lhs->accept(this);
+    nir_deref_var *lhs_deref = this->deref_head;
+    nir_ssa_def *src = evaluate_rvalue(ir->rhs);
+ 
+    if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) {
+       /* GLSL IR will give us the input to the write-masked assignment in a
+        * single packed vector.  So, for example, if the writemask is xzw, then
+        * we have to swizzle x -> x, y -> z, and z -> w and get the y component
+        * from the load.
+        */
+       unsigned swiz[4];
+       unsigned component = 0;
+       for (unsigned i = 0; i < 4; i++) {
+          swiz[i] = ir->write_mask & (1 << i) ? component++ : 0;
+       }
+       src = nir_swizzle(&b, src, swiz, num_components, !supports_ints);
+    }
+ 
+    nir_intrinsic_instr *store =
+       nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
+    store->num_components = ir->lhs->type->vector_elements;
+    store->const_index[0] = ir->write_mask;
+    nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
+    store->variables[0] = nir_deref_as_var(store_deref);
+    store->src[0] = nir_src_for_ssa(src);
+ 
+    if (ir->condition) {
+       nir_if *if_stmt = nir_if_create(this->shader);
+       if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition));
+       nir_builder_cf_insert(&b, &if_stmt->cf_node);
+       nir_instr_insert_after_cf_list(&if_stmt->then_list, &store->instr);
+       b.cursor = nir_after_cf_node(&if_stmt->cf_node);
+    } else {
+       nir_builder_instr_insert(&b, &store->instr);
+    }
+ }
+ 
+ /*
+  * Given an instruction, returns a pointer to its destination or NULL if there
+  * is no destination.
+  *
+  * Note that this only handles instructions we generate at this level.
+  */
+ static nir_dest *
+ get_instr_dest(nir_instr *instr)
+ {
+    nir_alu_instr *alu_instr;
+    nir_intrinsic_instr *intrinsic_instr;
+    nir_tex_instr *tex_instr;
+ 
+    switch (instr->type) {
+       case nir_instr_type_alu:
+          alu_instr = nir_instr_as_alu(instr);
+          return &alu_instr->dest.dest;
+ 
+       case nir_instr_type_intrinsic:
+          intrinsic_instr = nir_instr_as_intrinsic(instr);
+          if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest)
+             return &intrinsic_instr->dest;
+          else
+             return NULL;
+ 
+       case nir_instr_type_tex:
+          tex_instr = nir_instr_as_tex(instr);
+          return &tex_instr->dest;
+ 
+       default:
+          unreachable("not reached");
+    }
+ 
+    return NULL;
+ }
+ 
+ void
+ nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
+ {
+    nir_dest *dest = get_instr_dest(instr);
+ 
+    if (dest)
+       nir_ssa_dest_init(instr, dest, num_components, NULL);
+ 
+    nir_builder_instr_insert(&b, instr);
+ 
+    if (dest) {
+       assert(dest->is_ssa);
+       this->result = &dest->ssa;
+    }
+ }
+ 
+ nir_ssa_def *
+ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
+ {
+    ir->accept(this);
+    if (ir->as_dereference() || ir->as_constant()) {
+       /*
+        * A dereference is being used on the right hand side, which means we
+        * must emit a variable load.
+        */
+ 
+       nir_intrinsic_instr *load_instr =
+          nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
+       load_instr->num_components = ir->type->vector_elements;
+       load_instr->variables[0] = this->deref_head;
+       ralloc_steal(load_instr, load_instr->variables[0]);
+       add_instr(&load_instr->instr, ir->type->vector_elements);
+    }
+ 
+    return this->result;
+ }
+ 
+ void
+ nir_visitor::visit(ir_expression *ir)
+ {
+    /* Some special cases */
+    switch (ir->operation) {
+    case ir_binop_ubo_load: {
+       nir_intrinsic_instr *load =
+          nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
+       load->num_components = ir->type->vector_elements;
+       load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
+       load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
+       add_instr(&load->instr, ir->type->vector_elements);
+ 
+       /*
+        * In UBO's, a true boolean value is any non-zero value, but we consider
+        * a true boolean to be ~0. Fix this up with a != 0 comparison.
+        */
+ 
+       if (ir->type->base_type == GLSL_TYPE_BOOL)
+          this->result = nir_ine(&b, &load->dest.ssa, nir_imm_int(&b, 0));
+ 
+       return;
+    }
+ 
+    case ir_unop_interpolate_at_centroid:
+    case ir_binop_interpolate_at_offset:
+    case ir_binop_interpolate_at_sample: {
+       ir_dereference *deref = ir->operands[0]->as_dereference();
+       ir_swizzle *swizzle = NULL;
+       if (!deref) {
+          /* the api does not allow a swizzle here, but the varying packing code
+           * may have pushed one into here.
+           */
+          swizzle = ir->operands[0]->as_swizzle();
+          assert(swizzle);
+          deref = swizzle->val->as_dereference();
+          assert(deref);
+       }
+ 
+       deref->accept(this);
+ 
+       nir_intrinsic_op op;
+       if (this->deref_head->var->data.mode == nir_var_shader_in) {
+          switch (ir->operation) {
+          case ir_unop_interpolate_at_centroid:
+             op = nir_intrinsic_interp_var_at_centroid;
+             break;
+          case ir_binop_interpolate_at_offset:
+             op = nir_intrinsic_interp_var_at_offset;
+             break;
+          case ir_binop_interpolate_at_sample:
+             op = nir_intrinsic_interp_var_at_sample;
+             break;
+          default:
+             unreachable("Invalid interpolation intrinsic");
+          }
+       } else {
+          /* This case can happen if the vertex shader does not write the
+           * given varying.  In this case, the linker will lower it to a
+           * global variable.  Since interpolating a variable makes no
+           * sense, we'll just turn it into a load which will probably
+           * eventually end up as an SSA definition.
+           */
+          assert(this->deref_head->var->data.mode == nir_var_global);
+          op = nir_intrinsic_load_var;
+       }
+ 
+       nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
+       intrin->num_components = deref->type->vector_elements;
+       intrin->variables[0] = this->deref_head;
+       ralloc_steal(intrin, intrin->variables[0]);
+ 
+       if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset ||
+           intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
+          intrin->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
+ 
+       add_instr(&intrin->instr, deref->type->vector_elements);
+ 
+       if (swizzle) {
+          unsigned swiz[4] = {
+             swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w
+          };
+ 
+          result = nir_swizzle(&b, result, swiz,
+                               swizzle->type->vector_elements, false);
+       }
+ 
+       return;
+    }
+ 
+    default:
+       break;
+    }
+ 
+    nir_ssa_def *srcs[4];
+    for (unsigned i = 0; i < ir->get_num_operands(); i++)
+       srcs[i] = evaluate_rvalue(ir->operands[i]);
+ 
+    glsl_base_type types[4];
+    for (unsigned i = 0; i < ir->get_num_operands(); i++)
+       if (supports_ints)
+          types[i] = ir->operands[i]->type->base_type;
+       else
+          types[i] = GLSL_TYPE_FLOAT;
+ 
+    glsl_base_type out_type;
+    if (supports_ints)
+       out_type = ir->type->base_type;
+    else
+       out_type = GLSL_TYPE_FLOAT;
+ 
+    switch (ir->operation) {
+    case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break;
+    case ir_unop_logic_not:
+       result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]);
+       break;
+    case ir_unop_neg:
+       result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fneg(&b, srcs[0])
+                                              : nir_ineg(&b, srcs[0]);
+       break;
+    case ir_unop_abs:
+       result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fabs(&b, srcs[0])
+                                              : nir_iabs(&b, srcs[0]);
+       break;
+    case ir_unop_saturate:
+       assert(types[0] == GLSL_TYPE_FLOAT);
+       result = nir_fsat(&b, srcs[0]);
+       break;
+    case ir_unop_sign:
+       result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fsign(&b, srcs[0])
+                                              : nir_isign(&b, srcs[0]);
+       break;
+    case ir_unop_rcp:  result = nir_frcp(&b, srcs[0]);  break;
+    case ir_unop_rsq:  result = nir_frsq(&b, srcs[0]);  break;
+    case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break;
+    case ir_unop_exp:  unreachable("ir_unop_exp should have been lowered");
+    case ir_unop_log:  unreachable("ir_unop_log should have been lowered");
+    case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break;
+    case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break;
+    case ir_unop_i2f:
+       result = supports_ints ? nir_i2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
+       break;
+    case ir_unop_u2f:
+       result = supports_ints ? nir_u2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
+       break;
+    case ir_unop_b2f:
+       result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
+       break;
+    case ir_unop_f2i:  result = nir_f2i(&b, srcs[0]);   break;
+    case ir_unop_f2u:  result = nir_f2u(&b, srcs[0]);   break;
+    case ir_unop_f2b:  result = nir_f2b(&b, srcs[0]);   break;
+    case ir_unop_i2b:  result = nir_i2b(&b, srcs[0]);   break;
+    case ir_unop_b2i:  result = nir_b2i(&b, srcs[0]);   break;
+    case ir_unop_i2u:
+    case ir_unop_u2i:
+    case ir_unop_bitcast_i2f:
+    case ir_unop_bitcast_f2i:
+    case ir_unop_bitcast_u2f:
+    case ir_unop_bitcast_f2u:
+    case ir_unop_subroutine_to_int:
+       /* no-op */
+       result = nir_imov(&b, srcs[0]);
+       break;
+    case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break;
+    case ir_unop_ceil:  result = nir_fceil(&b, srcs[0]); break;
+    case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break;
+    case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break;
+    case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break;
+    case ir_unop_sin:   result = nir_fsin(&b, srcs[0]); break;
+    case ir_unop_cos:   result = nir_fcos(&b, srcs[0]); break;
+    case ir_unop_dFdx:        result = nir_fddx(&b, srcs[0]); break;
+    case ir_unop_dFdy:        result = nir_fddy(&b, srcs[0]); break;
+    case ir_unop_dFdx_fine:   result = nir_fddx_fine(&b, srcs[0]); break;
+    case ir_unop_dFdy_fine:   result = nir_fddy_fine(&b, srcs[0]); break;
+    case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break;
+    case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break;
+    case ir_unop_pack_snorm_2x16:
+       result = nir_pack_snorm_2x16(&b, srcs[0]);
+       break;
+    case ir_unop_pack_snorm_4x8:
+       result = nir_pack_snorm_4x8(&b, srcs[0]);
+       break;
+    case ir_unop_pack_unorm_2x16:
+       result = nir_pack_unorm_2x16(&b, srcs[0]);
+       break;
+    case ir_unop_pack_unorm_4x8:
+       result = nir_pack_unorm_4x8(&b, srcs[0]);
+       break;
+    case ir_unop_pack_half_2x16:
+       result = nir_pack_half_2x16(&b, srcs[0]);
+       break;
+    case ir_unop_unpack_snorm_2x16:
+       result = nir_unpack_snorm_2x16(&b, srcs[0]);
+       break;
+    case ir_unop_unpack_snorm_4x8:
+       result = nir_unpack_snorm_4x8(&b, srcs[0]);
+       break;
+    case ir_unop_unpack_unorm_2x16:
+       result = nir_unpack_unorm_2x16(&b, srcs[0]);
+       break;
+    case ir_unop_unpack_unorm_4x8:
+       result = nir_unpack_unorm_4x8(&b, srcs[0]);
+       break;
+    case ir_unop_unpack_half_2x16:
+       result = nir_unpack_half_2x16(&b, srcs[0]);
+       break;
- -   case ir_binop_pack_half_2x16_split:
- -         result = nir_pack_half_2x16_split(&b, srcs[0], srcs[1]);
- -         break;
+    case ir_unop_bitfield_reverse:
+       result = nir_bitfield_reverse(&b, srcs[0]);
+       break;
+    case ir_unop_bit_count:
+       result = nir_bit_count(&b, srcs[0]);
+       break;
+    case ir_unop_find_msb:
+       switch (types[0]) {
+       case GLSL_TYPE_UINT:
+          result = nir_ufind_msb(&b, srcs[0]);
+          break;
+       case GLSL_TYPE_INT:
+          result = nir_ifind_msb(&b, srcs[0]);
+          break;
+       default:
+          unreachable("Invalid type for findMSB()");
+       }
+       break;
+    case ir_unop_find_lsb:
+       result = nir_find_lsb(&b, srcs[0]);
+       break;
+ 
+    case ir_unop_noise:
+       switch (ir->type->vector_elements) {
+       case 1:
+          switch (ir->operands[0]->type->vector_elements) {
+             case 1: result = nir_fnoise1_1(&b, srcs[0]); break;
+             case 2: result = nir_fnoise1_2(&b, srcs[0]); break;
+             case 3: result = nir_fnoise1_3(&b, srcs[0]); break;
+             case 4: result = nir_fnoise1_4(&b, srcs[0]); break;
+             default: unreachable("not reached");
+          }
+          break;
+       case 2:
+          switch (ir->operands[0]->type->vector_elements) {
+             case 1: result = nir_fnoise2_1(&b, srcs[0]); break;
+             case 2: result = nir_fnoise2_2(&b, srcs[0]); break;
+             case 3: result = nir_fnoise2_3(&b, srcs[0]); break;
+             case 4: result = nir_fnoise2_4(&b, srcs[0]); break;
+             default: unreachable("not reached");
+          }
+          break;
+       case 3:
+          switch (ir->operands[0]->type->vector_elements) {
+             case 1: result = nir_fnoise3_1(&b, srcs[0]); break;
+             case 2: result = nir_fnoise3_2(&b, srcs[0]); break;
+             case 3: result = nir_fnoise3_3(&b, srcs[0]); break;
+             case 4: result = nir_fnoise3_4(&b, srcs[0]); break;
+             default: unreachable("not reached");
+          }
+          break;
+       case 4:
+          switch (ir->operands[0]->type->vector_elements) {
+             case 1: result = nir_fnoise4_1(&b, srcs[0]); break;
+             case 2: result = nir_fnoise4_2(&b, srcs[0]); break;
+             case 3: result = nir_fnoise4_3(&b, srcs[0]); break;
+             case 4: result = nir_fnoise4_4(&b, srcs[0]); break;
+             default: unreachable("not reached");
+          }
+          break;
+       default:
+          unreachable("not reached");
+       }
+       break;
+    case ir_unop_get_buffer_size: {
+       nir_intrinsic_instr *load = nir_intrinsic_instr_create(
+          this->shader,
+          nir_intrinsic_get_buffer_size);
+       load->num_components = ir->type->vector_elements;
+       load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
+       add_instr(&load->instr, ir->type->vector_elements);
+       return;
+    }
+ 
+    case ir_binop_add:
+       result = (out_type == GLSL_TYPE_FLOAT) ? nir_fadd(&b, srcs[0], srcs[1])
+                                              : nir_iadd(&b, srcs[0], srcs[1]);
+       break;
+    case ir_binop_sub:
+       result = (out_type == GLSL_TYPE_FLOAT) ? nir_fsub(&b, srcs[0], srcs[1])
+                                              : nir_isub(&b, srcs[0], srcs[1]);
+       break;
+    case ir_binop_mul:
+       result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmul(&b, srcs[0], srcs[1])
+                                              : nir_imul(&b, srcs[0], srcs[1]);
+       break;
+    case ir_binop_div:
+       if (out_type == GLSL_TYPE_FLOAT)
+          result = nir_fdiv(&b, srcs[0], srcs[1]);
+       else if (out_type == GLSL_TYPE_INT)
+          result = nir_idiv(&b, srcs[0], srcs[1]);
+       else
+          result = nir_udiv(&b, srcs[0], srcs[1]);
+       break;
+    case ir_binop_mod:
+       result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmod(&b, srcs[0], srcs[1])
+                                              : nir_umod(&b, srcs[0], srcs[1]);
+       break;
+    case ir_binop_min:
+       if (out_type == GLSL_TYPE_FLOAT)
+          result = nir_fmin(&b, srcs[0], srcs[1]);
+       else if (out_type == GLSL_TYPE_INT)
+          result = nir_imin(&b, srcs[0], srcs[1]);
+       else
+          result = nir_umin(&b, srcs[0], srcs[1]);
+       break;
+    case ir_binop_max:
+       if (out_type == GLSL_TYPE_FLOAT)
+          result = nir_fmax(&b, srcs[0], srcs[1]);
+       else if (out_type == GLSL_TYPE_INT)
+          result = nir_imax(&b, srcs[0], srcs[1]);
+       else
+          result = nir_umax(&b, srcs[0], srcs[1]);
+       break;
+    case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break;
+    case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break;
+    case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break;
+    case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
+    case ir_binop_logic_and:
+       result = supports_ints ? nir_iand(&b, srcs[0], srcs[1])
+                              : nir_fand(&b, srcs[0], srcs[1]);
+       break;
+    case ir_binop_logic_or:
+       result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
+                              : nir_for(&b, srcs[0], srcs[1]);
+       break;
+    case ir_binop_logic_xor:
+       result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1])
+                              : nir_fxor(&b, srcs[0], srcs[1]);
+       break;
+    case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break;
+    case ir_binop_rshift:
+       result = (out_type == GLSL_TYPE_INT) ? nir_ishr(&b, srcs[0], srcs[1])
+                                            : nir_ushr(&b, srcs[0], srcs[1]);
+       break;
+    case ir_binop_imul_high:
+       result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1])
+                                            : nir_umul_high(&b, srcs[0], srcs[1]);
+       break;
+    case ir_binop_carry:  result = nir_uadd_carry(&b, srcs[0], srcs[1]);  break;
+    case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break;
+    case ir_binop_less:
+       if (supports_ints) {
+          if (types[0] == GLSL_TYPE_FLOAT)
+             result = nir_flt(&b, srcs[0], srcs[1]);
+          else if (types[0] == GLSL_TYPE_INT)
+             result = nir_ilt(&b, srcs[0], srcs[1]);
+          else
+             result = nir_ult(&b, srcs[0], srcs[1]);
+       } else {
+          result = nir_slt(&b, srcs[0], srcs[1]);
+       }
+       break;
+    case ir_binop_greater:
+       if (supports_ints) {
+          if (types[0] == GLSL_TYPE_FLOAT)
+             result = nir_flt(&b, srcs[1], srcs[0]);
+          else if (types[0] == GLSL_TYPE_INT)
+             result = nir_ilt(&b, srcs[1], srcs[0]);
+          else
+             result = nir_ult(&b, srcs[1], srcs[0]);
+       } else {
+          result = nir_slt(&b, srcs[1], srcs[0]);
+       }
+       break;
+    case ir_binop_lequal:
+       if (supports_ints) {
+          if (types[0] == GLSL_TYPE_FLOAT)
+             result = nir_fge(&b, srcs[1], srcs[0]);
+          else if (types[0] == GLSL_TYPE_INT)
+             result = nir_ige(&b, srcs[1], srcs[0]);
+          else
+             result = nir_uge(&b, srcs[1], srcs[0]);
+       } else {
+          result = nir_slt(&b, srcs[1], srcs[0]);
+       }
+       break;
+    case ir_binop_gequal:
+       if (supports_ints) {
+          if (types[0] == GLSL_TYPE_FLOAT)
+             result = nir_fge(&b, srcs[0], srcs[1]);
+          else if (types[0] == GLSL_TYPE_INT)
+             result = nir_ige(&b, srcs[0], srcs[1]);
+          else
+             result = nir_uge(&b, srcs[0], srcs[1]);
+       } else {
+          result = nir_slt(&b, srcs[0], srcs[1]);
+       }
+       break;
+    case ir_binop_equal:
+       if (supports_ints) {
+          if (types[0] == GLSL_TYPE_FLOAT)
+             result = nir_feq(&b, srcs[0], srcs[1]);
+          else
+             result = nir_ieq(&b, srcs[0], srcs[1]);
+       } else {
+          result = nir_seq(&b, srcs[0], srcs[1]);
+       }
+       break;
+    case ir_binop_nequal:
+       if (supports_ints) {
+          if (types[0] == GLSL_TYPE_FLOAT)
+             result = nir_fne(&b, srcs[0], srcs[1]);
+          else
+             result = nir_ine(&b, srcs[0], srcs[1]);
+       } else {
+          result = nir_sne(&b, srcs[0], srcs[1]);
+       }
+       break;
+    case ir_binop_all_equal:
+       if (supports_ints) {
+          if (types[0] == GLSL_TYPE_FLOAT) {
+             switch (ir->operands[0]->type->vector_elements) {
+                case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
+                case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
+                case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break;
+                case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break;
+                default:
+                   unreachable("not reached");
+             }
+          } else {
+             switch (ir->operands[0]->type->vector_elements) {
+                case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break;
+                case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break;
+                case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break;
+                case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break;
+                default:
+                   unreachable("not reached");
+             }
+          }
+       } else {
+          switch (ir->operands[0]->type->vector_elements) {
+             case 1: result = nir_seq(&b, srcs[0], srcs[1]); break;
+             case 2: result = nir_fall_equal2(&b, srcs[0], srcs[1]); break;
+             case 3: result = nir_fall_equal3(&b, srcs[0], srcs[1]); break;
+             case 4: result = nir_fall_equal4(&b, srcs[0], srcs[1]); break;
+             default:
+                unreachable("not reached");
+          }
+       }
+       break;
+    case ir_binop_any_nequal:
+       if (supports_ints) {
+          if (types[0] == GLSL_TYPE_FLOAT) {
+             switch (ir->operands[0]->type->vector_elements) {
+                case 1: result = nir_fne(&b, srcs[0], srcs[1]); break;
+                case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
+                case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;
+                case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;
+                default:
+                   unreachable("not reached");
+             }
+          } else {
+             switch (ir->operands[0]->type->vector_elements) {
+                case 1: result = nir_ine(&b, srcs[0], srcs[1]); break;
+                case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break;
+                case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break;
+                case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break;
+                default:
+                   unreachable("not reached");
+             }
+          }
+       } else {
+          switch (ir->operands[0]->type->vector_elements) {
+             case 1: result = nir_sne(&b, srcs[0], srcs[1]); break;
+             case 2: result = nir_fany_nequal2(&b, srcs[0], srcs[1]); break;
+             case 3: result = nir_fany_nequal3(&b, srcs[0], srcs[1]); break;
+             case 4: result = nir_fany_nequal4(&b, srcs[0], srcs[1]); break;
+             default:
+                unreachable("not reached");
+          }
+       }
+       break;
+    case ir_binop_dot:
+       switch (ir->operands[0]->type->vector_elements) {
+          case 2: result = nir_fdot2(&b, srcs[0], srcs[1]); break;
+          case 3: result = nir_fdot3(&b, srcs[0], srcs[1]); break;
+          case 4: result = nir_fdot4(&b, srcs[0], srcs[1]); break;
+          default:
+             unreachable("not reached");
+       }
+       break;
+ 
+    case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
+    case ir_triop_fma:
+       result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);
+       break;
+    case ir_triop_lrp:
+       result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]);
+       break;
+    case ir_triop_csel:
+       if (supports_ints)
+          result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]);
+       else
+          result = nir_fcsel(&b, srcs[0], srcs[1], srcs[2]);
+       break;
+    case ir_triop_bitfield_extract:
+       result = (out_type == GLSL_TYPE_INT) ?
+          nir_ibitfield_extract(&b, srcs[0], srcs[1], srcs[2]) :
+          nir_ubitfield_extract(&b, srcs[0], srcs[1], srcs[2]);
+       break;
+    case ir_quadop_bitfield_insert:
+       result = nir_bitfield_insert(&b, srcs[0], srcs[1], srcs[2], srcs[3]);
+       break;
+    case ir_quadop_vector:
+       result = nir_vec(&b, srcs, ir->type->vector_elements);
+       break;
+ 
+    default:
+       unreachable("not reached");
+    }
+ }
+ 
+ void
+ nir_visitor::visit(ir_swizzle *ir)
+ {
+    unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
+    result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle,
+                         ir->type->vector_elements, !supports_ints);
+ }
+ 
+ void
+ nir_visitor::visit(ir_texture *ir)
+ {
+    unsigned num_srcs;
+    nir_texop op;
+    switch (ir->op) {
+    case ir_tex:
+       op = nir_texop_tex;
+       num_srcs = 1; /* coordinate */
+       break;
+ 
+    case ir_txb:
+    case ir_txl:
+       op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl;
+       num_srcs = 2; /* coordinate, bias/lod */
+       break;
+ 
+    case ir_txd:
+       op = nir_texop_txd; /* coordinate, dPdx, dPdy */
+       num_srcs = 3;
+       break;
+ 
+    case ir_txf:
+       op = nir_texop_txf;
+       if (ir->lod_info.lod != NULL)
+          num_srcs = 2; /* coordinate, lod */
+       else
+          num_srcs = 1; /* coordinate */
+       break;
+ 
+    case ir_txf_ms:
+       op = nir_texop_txf_ms;
+       num_srcs = 2; /* coordinate, sample_index */
+       break;
+ 
+    case ir_txs:
+       op = nir_texop_txs;
+       if (ir->lod_info.lod != NULL)
+          num_srcs = 1; /* lod */
+       else
+          num_srcs = 0;
+       break;
+ 
+    case ir_lod:
+       op = nir_texop_lod;
+       num_srcs = 1; /* coordinate */
+       break;
+ 
+    case ir_tg4:
+       op = nir_texop_tg4;
+       num_srcs = 1; /* coordinate */
+       break;
+ 
+    case ir_query_levels:
+       op = nir_texop_query_levels;
+       num_srcs = 0;
+       break;
+ 
+    case ir_texture_samples:
+       op = nir_texop_texture_samples;
+       num_srcs = 0;
+       break;
+ 
+    case ir_samples_identical:
+       op = nir_texop_samples_identical;
+       num_srcs = 1; /* coordinate */
+       break;
+ 
+    default:
+       unreachable("not reached");
+    }
+ 
+    if (ir->projector != NULL)
+       num_srcs++;
+    if (ir->shadow_comparitor != NULL)
+       num_srcs++;
+    if (ir->offset != NULL && ir->offset->as_constant() == NULL)
+       num_srcs++;
+ 
+    nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
+ 
+    instr->op = op;
+    instr->sampler_dim =
+       (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality;
+    instr->is_array = ir->sampler->type->sampler_array;
+    instr->is_shadow = ir->sampler->type->sampler_shadow;
+    if (instr->is_shadow)
+       instr->is_new_style_shadow = (ir->type->vector_elements == 1);
+    switch (ir->type->base_type) {
+    case GLSL_TYPE_FLOAT:
+       instr->dest_type = nir_type_float;
+       break;
+    case GLSL_TYPE_INT:
+       instr->dest_type = nir_type_int;
+       break;
+    case GLSL_TYPE_BOOL:
+    case GLSL_TYPE_UINT:
+       instr->dest_type = nir_type_uint;
+       break;
+    default:
+       unreachable("not reached");
+    }
+ 
+    instr->sampler = evaluate_deref(&instr->instr, ir->sampler);
+ 
+    unsigned src_number = 0;
+ 
+    if (ir->coordinate != NULL) {
+       instr->coord_components = ir->coordinate->type->vector_elements;
+       instr->src[src_number].src =
+          nir_src_for_ssa(evaluate_rvalue(ir->coordinate));
+       instr->src[src_number].src_type = nir_tex_src_coord;
+       src_number++;
+    }
+ 
+    if (ir->projector != NULL) {
+       instr->src[src_number].src =
+          nir_src_for_ssa(evaluate_rvalue(ir->projector));
+       instr->src[src_number].src_type = nir_tex_src_projector;
+       src_number++;
+    }
+ 
+    if (ir->shadow_comparitor != NULL) {
+       instr->src[src_number].src =
+          nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparitor));
+       instr->src[src_number].src_type = nir_tex_src_comparitor;
+       src_number++;
+    }
+ 
+    if (ir->offset != NULL) {
+       /* we don't support multiple offsets yet */
+       assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar());
+ 
+       ir_constant *const_offset = ir->offset->as_constant();
+       if (const_offset != NULL) {
+          for (unsigned i = 0; i < const_offset->type->vector_elements; i++)
+             instr->const_offset[i] = const_offset->value.i[i];
+       } else {
+          instr->src[src_number].src =
+             nir_src_for_ssa(evaluate_rvalue(ir->offset));
+          instr->src[src_number].src_type = nir_tex_src_offset;
+          src_number++;
+       }
+    }
+ 
+    switch (ir->op) {
+    case ir_txb:
+       instr->src[src_number].src =
+          nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias));
+       instr->src[src_number].src_type = nir_tex_src_bias;
+       src_number++;
+       break;
+ 
+    case ir_txl:
+    case ir_txf:
+    case ir_txs:
+       if (ir->lod_info.lod != NULL) {
+          instr->src[src_number].src =
+             nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod));
+          instr->src[src_number].src_type = nir_tex_src_lod;
+          src_number++;
+       }
+       break;
+ 
+    case ir_txd:
+       instr->src[src_number].src =
+          nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx));
+       instr->src[src_number].src_type = nir_tex_src_ddx;
+       src_number++;
+       instr->src[src_number].src =
+          nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy));
+       instr->src[src_number].src_type = nir_tex_src_ddy;
+       src_number++;
+       break;
+ 
+    case ir_txf_ms:
+       instr->src[src_number].src =
+          nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index));
+       instr->src[src_number].src_type = nir_tex_src_ms_index;
+       src_number++;
+       break;
+ 
+    case ir_tg4:
+       instr->component = ir->lod_info.component->as_constant()->value.u[0];
+       break;
+ 
+    default:
+       break;
+    }
+ 
+    assert(src_number == num_srcs);
+ 
+    add_instr(&instr->instr, nir_tex_instr_dest_size(instr));
+ }
+ 
+ void
+ nir_visitor::visit(ir_constant *ir)
+ {
+    /*
+     * We don't know if this variable is an an array or struct that gets
+     * dereferenced, so do the safe thing an make it a variable with a
+     * constant initializer and return a dereference.
+     */
+ 
+    nir_variable *var =
+       nir_local_variable_create(this->impl, ir->type, "const_temp");
+    var->data.read_only = true;
+    var->constant_initializer = constant_copy(ir, var);
+ 
+    this->deref_head = nir_deref_var_create(this->shader, var);
+    this->deref_tail = &this->deref_head->deref;
+ }
+ 
+ void
+ nir_visitor::visit(ir_dereference_variable *ir)
+ {
+    struct hash_entry *entry =
+       _mesa_hash_table_search(this->var_table, ir->var);
+    assert(entry);
+    nir_variable *var = (nir_variable *) entry->data;
+ 
+    nir_deref_var *deref = nir_deref_var_create(this->shader, var);
+    this->deref_head = deref;
+    this->deref_tail = &deref->deref;
+ }
+ 
+ void
+ nir_visitor::visit(ir_dereference_record *ir)
+ {
+    ir->record->accept(this);
+ 
+    int field_index = this->deref_tail->type->field_index(ir->field);
+    assert(field_index >= 0);
+ 
+    nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index);
+    deref->deref.type = ir->type;
+    this->deref_tail->child = &deref->deref;
+    this->deref_tail = &deref->deref;
+ }
+ 
+ void
+ nir_visitor::visit(ir_dereference_array *ir)
+ {
+    nir_deref_array *deref = nir_deref_array_create(this->shader);
+    deref->deref.type = ir->type;
+ 
+    ir_constant *const_index = ir->array_index->as_constant();
+    if (const_index != NULL) {
+       deref->deref_array_type = nir_deref_array_type_direct;
+       deref->base_offset = const_index->value.u[0];
+    } else {
+       deref->deref_array_type = nir_deref_array_type_indirect;
+       deref->indirect =
+          nir_src_for_ssa(evaluate_rvalue(ir->array_index));
+    }
+ 
+    ir->array->accept(this);
+ 
+    this->deref_tail->child = &deref->deref;
+    ralloc_steal(this->deref_tail, deref);
+    this->deref_tail = &deref->deref;
+ }
+ 
+ void
+ nir_visitor::visit(ir_barrier *ir)
+ {
+    nir_intrinsic_instr *instr =
+       nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier);
+    nir_builder_instr_insert(&b, &instr->instr);
+ }
diff --cc src/compiler/nir/nir.c

index 0000000000000000000000000000000000000000,21bf678c04e1b0cc798a6ee0fb2638fee232d2d0..42a53f6f3db01d0fc07afe98032cce475c195aff

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@@ -1,0 -1,1665 +1,1753 @@@
- -nir_function_impl_create(nir_function *function)
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ #include "nir.h"
+ #include "nir_control_flow_private.h"
+ #include <assert.h>
+ 
+ nir_shader *
+ nir_shader_create(void *mem_ctx,
+                   gl_shader_stage stage,
+                   const nir_shader_compiler_options *options)
+ {
+    nir_shader *shader = ralloc(mem_ctx, nir_shader);
+ 
+    exec_list_make_empty(&shader->uniforms);
+    exec_list_make_empty(&shader->inputs);
+    exec_list_make_empty(&shader->outputs);
++   exec_list_make_empty(&shader->shared);
+ 
+    shader->options = options;
+    memset(&shader->info, 0, sizeof(shader->info));
+ 
+    exec_list_make_empty(&shader->functions);
+    exec_list_make_empty(&shader->registers);
+    exec_list_make_empty(&shader->globals);
+    exec_list_make_empty(&shader->system_values);
+    shader->reg_alloc = 0;
+ 
+    shader->num_inputs = 0;
+    shader->num_outputs = 0;
+    shader->num_uniforms = 0;
++   shader->num_shared = 0;
+ 
+    shader->stage = stage;
+ 
+    return shader;
+ }
+ 
+ static nir_register *
+ reg_create(void *mem_ctx, struct exec_list *list)
+ {
+    nir_register *reg = ralloc(mem_ctx, nir_register);
+ 
+    list_inithead(&reg->uses);
+    list_inithead(&reg->defs);
+    list_inithead(&reg->if_uses);
+ 
+    reg->num_components = 0;
+    reg->num_array_elems = 0;
+    reg->is_packed = false;
+    reg->name = NULL;
+ 
+    exec_list_push_tail(list, &reg->node);
+ 
+    return reg;
+ }
+ 
+ nir_register *
+ nir_global_reg_create(nir_shader *shader)
+ {
+    nir_register *reg = reg_create(shader, &shader->registers);
+    reg->index = shader->reg_alloc++;
+    reg->is_global = true;
+ 
+    return reg;
+ }
+ 
+ nir_register *
+ nir_local_reg_create(nir_function_impl *impl)
+ {
+    nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
+    reg->index = impl->reg_alloc++;
+    reg->is_global = false;
+ 
+    return reg;
+ }
+ 
+ void
+ nir_reg_remove(nir_register *reg)
+ {
+    exec_node_remove(&reg->node);
+ }
+ 
+ void
+ nir_shader_add_variable(nir_shader *shader, nir_variable *var)
+ {
+    switch (var->data.mode) {
+    case nir_var_all:
+       assert(!"invalid mode");
+       break;
+ 
+    case nir_var_local:
+       assert(!"nir_shader_add_variable cannot be used for local variables");
+       break;
+ 
+    case nir_var_global:
+       exec_list_push_tail(&shader->globals, &var->node);
+       break;
+ 
+    case nir_var_shader_in:
+       exec_list_push_tail(&shader->inputs, &var->node);
+       break;
+ 
+    case nir_var_shader_out:
+       exec_list_push_tail(&shader->outputs, &var->node);
+       break;
+ 
+    case nir_var_uniform:
+    case nir_var_shader_storage:
+       exec_list_push_tail(&shader->uniforms, &var->node);
+       break;
+ 
++   case nir_var_shared:
++      assert(shader->stage == MESA_SHADER_COMPUTE);
++      exec_list_push_tail(&shader->shared, &var->node);
++      break;
++
+    case nir_var_system_value:
+       exec_list_push_tail(&shader->system_values, &var->node);
+       break;
+    }
+ }
+ 
+ nir_variable *
+ nir_variable_create(nir_shader *shader, nir_variable_mode mode,
+                     const struct glsl_type *type, const char *name)
+ {
+    nir_variable *var = rzalloc(shader, nir_variable);
+    var->name = ralloc_strdup(var, name);
+    var->type = type;
+    var->data.mode = mode;
+ 
+    if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) ||
+        (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT))
+       var->data.interpolation = INTERP_QUALIFIER_SMOOTH;
+ 
+    if (mode == nir_var_shader_in || mode == nir_var_uniform)
+       var->data.read_only = true;
+ 
+    nir_shader_add_variable(shader, var);
+ 
+    return var;
+ }
+ 
+ nir_variable *
+ nir_local_variable_create(nir_function_impl *impl,
+                           const struct glsl_type *type, const char *name)
+ {
+    nir_variable *var = rzalloc(impl->function->shader, nir_variable);
+    var->name = ralloc_strdup(var, name);
+    var->type = type;
+    var->data.mode = nir_var_local;
+ 
+    nir_function_impl_add_variable(impl, var);
+ 
+    return var;
+ }
+ 
+ nir_function *
+ nir_function_create(nir_shader *shader, const char *name)
+ {
+    nir_function *func = ralloc(shader, nir_function);
+ 
+    exec_list_push_tail(&shader->functions, &func->node);
+ 
+    func->name = ralloc_strdup(func, name);
+    func->shader = shader;
+    func->num_params = 0;
+    func->params = NULL;
+    func->return_type = glsl_void_type();
+    func->impl = NULL;
+ 
+    return func;
+ }
+ 
+ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
+ {
+    dest->is_ssa = src->is_ssa;
+    if (src->is_ssa) {
+       dest->ssa = src->ssa;
+    } else {
+       dest->reg.base_offset = src->reg.base_offset;
+       dest->reg.reg = src->reg.reg;
+       if (src->reg.indirect) {
+          dest->reg.indirect = ralloc(mem_ctx, nir_src);
+          nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
+       } else {
+          dest->reg.indirect = NULL;
+       }
+    }
+ }
+ 
+ void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
+ {
+    /* Copying an SSA definition makes no sense whatsoever. */
+    assert(!src->is_ssa);
+ 
+    dest->is_ssa = false;
+ 
+    dest->reg.base_offset = src->reg.base_offset;
+    dest->reg.reg = src->reg.reg;
+    if (src->reg.indirect) {
+       dest->reg.indirect = ralloc(instr, nir_src);
+       nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
+    } else {
+       dest->reg.indirect = NULL;
+    }
+ }
+ 
+ void
+ nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+                  nir_alu_instr *instr)
+ {
+    nir_src_copy(&dest->src, &src->src, &instr->instr);
+    dest->abs = src->abs;
+    dest->negate = src->negate;
+    for (unsigned i = 0; i < 4; i++)
+       dest->swizzle[i] = src->swizzle[i];
+ }
+ 
+ void
+ nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+                   nir_alu_instr *instr)
+ {
+    nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
+    dest->write_mask = src->write_mask;
+    dest->saturate = src->saturate;
+ }
+ 
+ 
+ static void
+ cf_init(nir_cf_node *node, nir_cf_node_type type)
+ {
+    exec_node_init(&node->node);
+    node->parent = NULL;
+    node->type = type;
+ }
+ 
+ nir_function_impl *
- -   assert(function->impl == NULL);
- -
- -   void *mem_ctx = ralloc_parent(function);
- -
- -   nir_function_impl *impl = ralloc(mem_ctx, nir_function_impl);
++nir_function_impl_create_bare(nir_shader *shader)
+ {
- -   function->impl = impl;
- -   impl->function = function;
++   nir_function_impl *impl = ralloc(shader, nir_function_impl);
+ 
- -   nir_block *start_block = nir_block_create(mem_ctx);
- -   nir_block *end_block = nir_block_create(mem_ctx);
++   impl->function = NULL;
+ 
+    cf_init(&impl->cf_node, nir_cf_node_function);
+ 
+    exec_list_make_empty(&impl->body);
+    exec_list_make_empty(&impl->registers);
+    exec_list_make_empty(&impl->locals);
+    impl->num_params = 0;
+    impl->params = NULL;
+    impl->return_var = NULL;
+    impl->reg_alloc = 0;
+    impl->ssa_alloc = 0;
+    impl->valid_metadata = nir_metadata_none;
+ 
+    /* create start & end blocks */
- -   instr->sampler_array_size = 0;
++   nir_block *start_block = nir_block_create(shader);
++   nir_block *end_block = nir_block_create(shader);
+    start_block->cf_node.parent = &impl->cf_node;
+    end_block->cf_node.parent = &impl->cf_node;
+    impl->end_block = end_block;
+ 
+    exec_list_push_tail(&impl->body, &start_block->cf_node.node);
+ 
+    start_block->successors[0] = end_block;
+    _mesa_set_add(end_block->predecessors, start_block);
+    return impl;
+ }
+ 
++nir_function_impl *
++nir_function_impl_create(nir_function *function)
++{
++   assert(function->impl == NULL);
++
++   nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
++
++   function->impl = impl;
++   impl->function = function;
++
++   impl->num_params = function->num_params;
++   impl->params = ralloc_array(function->shader,
++                               nir_variable *, impl->num_params);
++
++   return impl;
++}
++
+ nir_block *
+ nir_block_create(nir_shader *shader)
+ {
+    nir_block *block = ralloc(shader, nir_block);
+ 
+    cf_init(&block->cf_node, nir_cf_node_block);
+ 
+    block->successors[0] = block->successors[1] = NULL;
+    block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
+                                           _mesa_key_pointer_equal);
+    block->imm_dom = NULL;
+    /* XXX maybe it would be worth it to defer allocation?  This
+     * way it doesn't get allocated for shader ref's that never run
+     * nir_calc_dominance?  For example, state-tracker creates an
+     * initial IR, clones that, runs appropriate lowering pass, passes
+     * to driver which does common lowering/opt, and then stores ref
+     * which is later used to do state specific lowering and futher
+     * opt.  Do any of the references not need dominance metadata?
+     */
+    block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
+                                           _mesa_key_pointer_equal);
+ 
+    exec_list_make_empty(&block->instr_list);
+ 
+    return block;
+ }
+ 
+ static inline void
+ src_init(nir_src *src)
+ {
+    src->is_ssa = false;
+    src->reg.reg = NULL;
+    src->reg.indirect = NULL;
+    src->reg.base_offset = 0;
+ }
+ 
+ nir_if *
+ nir_if_create(nir_shader *shader)
+ {
+    nir_if *if_stmt = ralloc(shader, nir_if);
+ 
+    cf_init(&if_stmt->cf_node, nir_cf_node_if);
+    src_init(&if_stmt->condition);
+ 
+    nir_block *then = nir_block_create(shader);
+    exec_list_make_empty(&if_stmt->then_list);
+    exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
+    then->cf_node.parent = &if_stmt->cf_node;
+ 
+    nir_block *else_stmt = nir_block_create(shader);
+    exec_list_make_empty(&if_stmt->else_list);
+    exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
+    else_stmt->cf_node.parent = &if_stmt->cf_node;
+ 
+    return if_stmt;
+ }
+ 
+ nir_loop *
+ nir_loop_create(nir_shader *shader)
+ {
+    nir_loop *loop = ralloc(shader, nir_loop);
+ 
+    cf_init(&loop->cf_node, nir_cf_node_loop);
+ 
+    nir_block *body = nir_block_create(shader);
+    exec_list_make_empty(&loop->body);
+    exec_list_push_tail(&loop->body, &body->cf_node.node);
+    body->cf_node.parent = &loop->cf_node;
+ 
+    body->successors[0] = body;
+    _mesa_set_add(body->predecessors, body);
+ 
+    return loop;
+ }
+ 
+ static void
+ instr_init(nir_instr *instr, nir_instr_type type)
+ {
+    instr->type = type;
+    instr->block = NULL;
+    exec_node_init(&instr->node);
+ }
+ 
+ static void
+ dest_init(nir_dest *dest)
+ {
+    dest->is_ssa = false;
+    dest->reg.reg = NULL;
+    dest->reg.indirect = NULL;
+    dest->reg.base_offset = 0;
+ }
+ 
+ static void
+ alu_dest_init(nir_alu_dest *dest)
+ {
+    dest_init(&dest->dest);
+    dest->saturate = false;
+    dest->write_mask = 0xf;
+ }
+ 
+ static void
+ alu_src_init(nir_alu_src *src)
+ {
+    src_init(&src->src);
+    src->abs = src->negate = false;
+    src->swizzle[0] = 0;
+    src->swizzle[1] = 1;
+    src->swizzle[2] = 2;
+    src->swizzle[3] = 3;
+ }
+ 
+ nir_alu_instr *
+ nir_alu_instr_create(nir_shader *shader, nir_op op)
+ {
+    unsigned num_srcs = nir_op_infos[op].num_inputs;
+    nir_alu_instr *instr =
+       ralloc_size(shader,
+                   sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
+ 
+    instr_init(&instr->instr, nir_instr_type_alu);
+    instr->op = op;
+    alu_dest_init(&instr->dest);
+    for (unsigned i = 0; i < num_srcs; i++)
+       alu_src_init(&instr->src[i]);
+ 
+    return instr;
+ }
+ 
+ nir_jump_instr *
+ nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
+ {
+    nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
+    instr_init(&instr->instr, nir_instr_type_jump);
+    instr->type = type;
+    return instr;
+ }
+ 
+ nir_load_const_instr *
+ nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
+ {
+    nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
+    instr_init(&instr->instr, nir_instr_type_load_const);
+ 
+    nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+ 
+    return instr;
+ }
+ 
+ nir_intrinsic_instr *
+ nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
+ {
+    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
+    nir_intrinsic_instr *instr =
+       ralloc_size(shader,
+                   sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
+ 
+    instr_init(&instr->instr, nir_instr_type_intrinsic);
+    instr->intrinsic = op;
+ 
+    if (nir_intrinsic_infos[op].has_dest)
+       dest_init(&instr->dest);
+ 
+    for (unsigned i = 0; i < num_srcs; i++)
+       src_init(&instr->src[i]);
+ 
+    return instr;
+ }
+ 
+ nir_call_instr *
+ nir_call_instr_create(nir_shader *shader, nir_function *callee)
+ {
+    nir_call_instr *instr = ralloc(shader, nir_call_instr);
+    instr_init(&instr->instr, nir_instr_type_call);
+ 
+    instr->callee = callee;
+    instr->num_params = callee->num_params;
+    instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params);
+    instr->return_deref = NULL;
+ 
+    return instr;
+ }
+ 
+ nir_tex_instr *
+ nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
+ {
+    nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
+    instr_init(&instr->instr, nir_instr_type_tex);
+ 
+    dest_init(&instr->dest);
+ 
+    instr->num_srcs = num_srcs;
+    instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
+    for (unsigned i = 0; i < num_srcs; i++)
+       src_init(&instr->src[i].src);
+ 
++   instr->texture_index = 0;
++   instr->texture_array_size = 0;
++   instr->texture = NULL;
+    instr->sampler_index = 0;
+    instr->sampler = NULL;
+ 
+    return instr;
+ }
+ 
+ nir_phi_instr *
+ nir_phi_instr_create(nir_shader *shader)
+ {
+    nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
+    instr_init(&instr->instr, nir_instr_type_phi);
+ 
+    dest_init(&instr->dest);
+    exec_list_make_empty(&instr->srcs);
+    return instr;
+ }
+ 
+ nir_parallel_copy_instr *
+ nir_parallel_copy_instr_create(nir_shader *shader)
+ {
+    nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
+    instr_init(&instr->instr, nir_instr_type_parallel_copy);
+ 
+    exec_list_make_empty(&instr->entries);
+ 
+    return instr;
+ }
+ 
+ nir_ssa_undef_instr *
+ nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
+ {
+    nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
+    instr_init(&instr->instr, nir_instr_type_ssa_undef);
+ 
+    nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+ 
+    return instr;
+ }
+ 
+ nir_deref_var *
+ nir_deref_var_create(void *mem_ctx, nir_variable *var)
+ {
+    nir_deref_var *deref = ralloc(mem_ctx, nir_deref_var);
+    deref->deref.deref_type = nir_deref_type_var;
+    deref->deref.child = NULL;
+    deref->deref.type = var->type;
+    deref->var = var;
+    return deref;
+ }
+ 
+ nir_deref_array *
+ nir_deref_array_create(void *mem_ctx)
+ {
+    nir_deref_array *deref = ralloc(mem_ctx, nir_deref_array);
+    deref->deref.deref_type = nir_deref_type_array;
+    deref->deref.child = NULL;
+    deref->deref_array_type = nir_deref_array_type_direct;
+    src_init(&deref->indirect);
+    deref->base_offset = 0;
+    return deref;
+ }
+ 
+ nir_deref_struct *
+ nir_deref_struct_create(void *mem_ctx, unsigned field_index)
+ {
+    nir_deref_struct *deref = ralloc(mem_ctx, nir_deref_struct);
+    deref->deref.deref_type = nir_deref_type_struct;
+    deref->deref.child = NULL;
+    deref->index = field_index;
+    return deref;
+ }
+ 
+ static nir_deref_var *
+ copy_deref_var(void *mem_ctx, nir_deref_var *deref)
+ {
+    nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
+    ret->deref.type = deref->deref.type;
+    if (deref->deref.child)
+       ret->deref.child = nir_copy_deref(ret, deref->deref.child);
+    return ret;
+ }
+ 
+ static nir_deref_array *
+ copy_deref_array(void *mem_ctx, nir_deref_array *deref)
+ {
+    nir_deref_array *ret = nir_deref_array_create(mem_ctx);
+    ret->base_offset = deref->base_offset;
+    ret->deref_array_type = deref->deref_array_type;
+    if (deref->deref_array_type == nir_deref_array_type_indirect) {
+       nir_src_copy(&ret->indirect, &deref->indirect, mem_ctx);
+    }
+    ret->deref.type = deref->deref.type;
+    if (deref->deref.child)
+       ret->deref.child = nir_copy_deref(ret, deref->deref.child);
+    return ret;
+ }
+ 
+ static nir_deref_struct *
+ copy_deref_struct(void *mem_ctx, nir_deref_struct *deref)
+ {
+    nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index);
+    ret->deref.type = deref->deref.type;
+    if (deref->deref.child)
+       ret->deref.child = nir_copy_deref(ret, deref->deref.child);
+    return ret;
+ }
+ 
+ nir_deref *
+ nir_copy_deref(void *mem_ctx, nir_deref *deref)
+ {
+    switch (deref->deref_type) {
+    case nir_deref_type_var:
+       return &copy_deref_var(mem_ctx, nir_deref_as_var(deref))->deref;
+    case nir_deref_type_array:
+       return &copy_deref_array(mem_ctx, nir_deref_as_array(deref))->deref;
+    case nir_deref_type_struct:
+       return &copy_deref_struct(mem_ctx, nir_deref_as_struct(deref))->deref;
+    default:
+       unreachable("Invalid dereference type");
+    }
+ 
+    return NULL;
+ }
+ 
+ /* Returns a load_const instruction that represents the constant
+  * initializer for the given deref chain.  The caller is responsible for
+  * ensuring that there actually is a constant initializer.
+  */
+ nir_load_const_instr *
+ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
+ {
+    nir_constant *constant = deref->var->constant_initializer;
+    assert(constant);
+ 
+    const nir_deref *tail = &deref->deref;
+    unsigned matrix_offset = 0;
+    while (tail->child) {
+       switch (tail->child->deref_type) {
+       case nir_deref_type_array: {
+          nir_deref_array *arr = nir_deref_as_array(tail->child);
+          assert(arr->deref_array_type == nir_deref_array_type_direct);
+          if (glsl_type_is_matrix(tail->type)) {
+             assert(arr->deref.child == NULL);
+             matrix_offset = arr->base_offset;
+          } else {
+             constant = constant->elements[arr->base_offset];
+          }
+          break;
+       }
+ 
+       case nir_deref_type_struct: {
+          constant = constant->elements[nir_deref_as_struct(tail->child)->index];
+          break;
+       }
+ 
+       default:
+          unreachable("Invalid deref child type");
+       }
+ 
+       tail = tail->child;
+    }
+ 
+    nir_load_const_instr *load =
+       nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type));
+ 
+    matrix_offset *= load->def.num_components;
+    for (unsigned i = 0; i < load->def.num_components; i++) {
+       switch (glsl_get_base_type(tail->type)) {
+       case GLSL_TYPE_FLOAT:
+       case GLSL_TYPE_INT:
+       case GLSL_TYPE_UINT:
+          load->value.u[i] = constant->value.u[matrix_offset + i];
+          break;
+       case GLSL_TYPE_BOOL:
+          load->value.u[i] = constant->value.b[matrix_offset + i] ?
+                              NIR_TRUE : NIR_FALSE;
+          break;
+       default:
+          unreachable("Invalid immediate type");
+       }
+    }
+ 
+    return load;
+ }
+ 
+ nir_function_impl *
+ nir_cf_node_get_function(nir_cf_node *node)
+ {
+    while (node->type != nir_cf_node_function) {
+       node = node->parent;
+    }
+ 
+    return nir_cf_node_as_function(node);
+ }
+ 
++/* Reduces a cursor by trying to convert everything to after and trying to
++ * go up to block granularity when possible.
++ */
++static nir_cursor
++reduce_cursor(nir_cursor cursor)
++{
++   switch (cursor.option) {
++   case nir_cursor_before_block:
++      if (exec_list_is_empty(&cursor.block->instr_list)) {
++         /* Empty block.  After is as good as before. */
++         cursor.option = nir_cursor_after_block;
++      } else {
++         /* Try to switch to after the previous block if there is one.
++          * (This isn't likely, but it can happen.)
++          */
++         nir_cf_node *prev_node = nir_cf_node_prev(&cursor.block->cf_node);
++         if (prev_node && prev_node->type == nir_cf_node_block) {
++            cursor.block = nir_cf_node_as_block(prev_node);
++            cursor.option = nir_cursor_after_block;
++         }
++      }
++      return cursor;
++
++   case nir_cursor_after_block:
++      return cursor;
++
++   case nir_cursor_before_instr: {
++      nir_instr *prev_instr = nir_instr_prev(cursor.instr);
++      if (prev_instr) {
++         /* Before this instruction is after the previous */
++         cursor.instr = prev_instr;
++         cursor.option = nir_cursor_after_instr;
++      } else {
++         /* No previous instruction.  Switch to before block */
++         cursor.block = cursor.instr->block;
++         cursor.option = nir_cursor_before_block;
++      }
++      return reduce_cursor(cursor);
++   }
++
++   case nir_cursor_after_instr:
++      if (nir_instr_next(cursor.instr) == NULL) {
++         /* This is the last instruction, switch to after block */
++         cursor.option = nir_cursor_after_block;
++         cursor.block = cursor.instr->block;
++      }
++      return cursor;
++
++   default:
++      unreachable("Inavlid cursor option");
++   }
++}
++
++bool
++nir_cursors_equal(nir_cursor a, nir_cursor b)
++{
++   /* Reduced cursors should be unique */
++   a = reduce_cursor(a);
++   b = reduce_cursor(b);
++
++   return a.block == b.block && a.option == b.option;
++}
++
+ static bool
+ add_use_cb(nir_src *src, void *state)
+ {
+    nir_instr *instr = state;
+ 
+    src->parent_instr = instr;
+    list_addtail(&src->use_link,
+                 src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
+ 
+    return true;
+ }
+ 
+ static bool
+ add_ssa_def_cb(nir_ssa_def *def, void *state)
+ {
+    nir_instr *instr = state;
+ 
+    if (instr->block && def->index == UINT_MAX) {
+       nir_function_impl *impl =
+          nir_cf_node_get_function(&instr->block->cf_node);
+ 
+       def->index = impl->ssa_alloc++;
+    }
+ 
+    return true;
+ }
+ 
+ static bool
+ add_reg_def_cb(nir_dest *dest, void *state)
+ {
+    nir_instr *instr = state;
+ 
+    if (!dest->is_ssa) {
+       dest->reg.parent_instr = instr;
+       list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
+    }
+ 
+    return true;
+ }
+ 
+ static void
+ add_defs_uses(nir_instr *instr)
+ {
+    nir_foreach_src(instr, add_use_cb, instr);
+    nir_foreach_dest(instr, add_reg_def_cb, instr);
+    nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
+ }
+ 
+ void
+ nir_instr_insert(nir_cursor cursor, nir_instr *instr)
+ {
+    switch (cursor.option) {
+    case nir_cursor_before_block:
+       /* Only allow inserting jumps into empty blocks. */
+       if (instr->type == nir_instr_type_jump)
+          assert(exec_list_is_empty(&cursor.block->instr_list));
+ 
+       instr->block = cursor.block;
+       add_defs_uses(instr);
+       exec_list_push_head(&cursor.block->instr_list, &instr->node);
+       break;
+    case nir_cursor_after_block: {
+       /* Inserting instructions after a jump is illegal. */
+       nir_instr *last = nir_block_last_instr(cursor.block);
+       assert(last == NULL || last->type != nir_instr_type_jump);
+       (void) last;
+ 
+       instr->block = cursor.block;
+       add_defs_uses(instr);
+       exec_list_push_tail(&cursor.block->instr_list, &instr->node);
+       break;
+    }
+    case nir_cursor_before_instr:
+       assert(instr->type != nir_instr_type_jump);
+       instr->block = cursor.instr->block;
+       add_defs_uses(instr);
+       exec_node_insert_node_before(&cursor.instr->node, &instr->node);
+       break;
+    case nir_cursor_after_instr:
+       /* Inserting instructions after a jump is illegal. */
+       assert(cursor.instr->type != nir_instr_type_jump);
+ 
+       /* Only allow inserting jumps at the end of the block. */
+       if (instr->type == nir_instr_type_jump)
+          assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
+ 
+       instr->block = cursor.instr->block;
+       add_defs_uses(instr);
+       exec_node_insert_after(&cursor.instr->node, &instr->node);
+       break;
+    }
+ 
+    if (instr->type == nir_instr_type_jump)
+       nir_handle_add_jump(instr->block);
+ }
+ 
+ static bool
+ src_is_valid(const nir_src *src)
+ {
+    return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
+ }
+ 
+ static bool
+ remove_use_cb(nir_src *src, void *state)
+ {
+    if (src_is_valid(src))
+       list_del(&src->use_link);
+ 
+    return true;
+ }
+ 
+ static bool
+ remove_def_cb(nir_dest *dest, void *state)
+ {
+    if (!dest->is_ssa)
+       list_del(&dest->reg.def_link);
+ 
+    return true;
+ }
+ 
+ static void
+ remove_defs_uses(nir_instr *instr)
+ {
+    nir_foreach_dest(instr, remove_def_cb, instr);
+    nir_foreach_src(instr, remove_use_cb, instr);
+ }
+ 
+ void nir_instr_remove(nir_instr *instr)
+ {
+    remove_defs_uses(instr);
+    exec_node_remove(&instr->node);
+ 
+    if (instr->type == nir_instr_type_jump) {
+       nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
+       nir_handle_remove_jump(instr->block, jump_instr->type);
+    }
+ }
+ 
+ /*@}*/
+ 
+ void
+ nir_index_local_regs(nir_function_impl *impl)
+ {
+    unsigned index = 0;
+    foreach_list_typed(nir_register, reg, node, &impl->registers) {
+       reg->index = index++;
+    }
+    impl->reg_alloc = index;
+ }
+ 
+ void
+ nir_index_global_regs(nir_shader *shader)
+ {
+    unsigned index = 0;
+    foreach_list_typed(nir_register, reg, node, &shader->registers) {
+       reg->index = index++;
+    }
+    shader->reg_alloc = index;
+ }
+ 
+ static bool
+ visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
+ {
+    return cb(&instr->dest.dest, state);
+ }
+ 
+ static bool
+ visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
+                      void *state)
+ {
+    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+       return cb(&instr->dest, state);
+ 
+    return true;
+ }
+ 
+ static bool
+ visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
+                    void *state)
+ {
+    return cb(&instr->dest, state);
+ }
+ 
+ static bool
+ visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
+ {
+    return cb(&instr->dest, state);
+ }
+ 
+ static bool
+ visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
+                          nir_foreach_dest_cb cb, void *state)
+ {
+    nir_foreach_parallel_copy_entry(instr, entry) {
+       if (!cb(&entry->dest, state))
+          return false;
+    }
+ 
+    return true;
+ }
+ 
+ bool
+ nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
+ {
+    switch (instr->type) {
+    case nir_instr_type_alu:
+       return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
+    case nir_instr_type_intrinsic:
+       return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
+    case nir_instr_type_tex:
+       return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
+    case nir_instr_type_phi:
+       return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
+    case nir_instr_type_parallel_copy:
+       return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
+                                       cb, state);
+ 
+    case nir_instr_type_load_const:
+    case nir_instr_type_ssa_undef:
+    case nir_instr_type_call:
+    case nir_instr_type_jump:
+       break;
+ 
+    default:
+       unreachable("Invalid instruction type");
+       break;
+    }
+ 
+    return true;
+ }
+ 
+ struct foreach_ssa_def_state {
+    nir_foreach_ssa_def_cb cb;
+    void *client_state;
+ };
+ 
+ static inline bool
+ nir_ssa_def_visitor(nir_dest *dest, void *void_state)
+ {
+    struct foreach_ssa_def_state *state = void_state;
+ 
+    if (dest->is_ssa)
+       return state->cb(&dest->ssa, state->client_state);
+    else
+       return true;
+ }
+ 
+ bool
+ nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
+ {
+    switch (instr->type) {
+    case nir_instr_type_alu:
+    case nir_instr_type_tex:
+    case nir_instr_type_intrinsic:
+    case nir_instr_type_phi:
+    case nir_instr_type_parallel_copy: {
+       struct foreach_ssa_def_state foreach_state = {cb, state};
+       return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
+    }
+ 
+    case nir_instr_type_load_const:
+       return cb(&nir_instr_as_load_const(instr)->def, state);
+    case nir_instr_type_ssa_undef:
+       return cb(&nir_instr_as_ssa_undef(instr)->def, state);
+    case nir_instr_type_call:
+    case nir_instr_type_jump:
+       return true;
+    default:
+       unreachable("Invalid instruction type");
+    }
+ }
+ 
+ static bool
+ visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
+ {
+    if (!cb(src, state))
+       return false;
+    if (!src->is_ssa && src->reg.indirect)
+       return cb(src->reg.indirect, state);
+    return true;
+ }
+ 
+ static bool
+ visit_deref_array_src(nir_deref_array *deref, nir_foreach_src_cb cb,
+                       void *state)
+ {
+    if (deref->deref_array_type == nir_deref_array_type_indirect)
+       return visit_src(&deref->indirect, cb, state);
+    return true;
+ }
+ 
+ static bool
+ visit_deref_src(nir_deref_var *deref, nir_foreach_src_cb cb, void *state)
+ {
+    nir_deref *cur = &deref->deref;
+    while (cur != NULL) {
+       if (cur->deref_type == nir_deref_type_array)
+          if (!visit_deref_array_src(nir_deref_as_array(cur), cb, state))
+             return false;
+ 
+       cur = cur->child;
+    }
+ 
+    return true;
+ }
+ 
+ static bool
+ visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
+ {
+    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+       if (!visit_src(&instr->src[i].src, cb, state))
+          return false;
+ 
+    return true;
+ }
+ 
+ static bool
+ visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
+ {
+    for (unsigned i = 0; i < instr->num_srcs; i++)
+       if (!visit_src(&instr->src[i].src, cb, state))
+          return false;
+ 
++   if (instr->texture != NULL)
++      if (!visit_deref_src(instr->texture, cb, state))
++         return false;
++
+    if (instr->sampler != NULL)
+       if (!visit_deref_src(instr->sampler, cb, state))
+          return false;
+ 
+    return true;
+ }
+ 
+ static bool
+ visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
+                     void *state)
+ {
+    unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+    for (unsigned i = 0; i < num_srcs; i++)
+       if (!visit_src(&instr->src[i], cb, state))
+          return false;
+ 
+    unsigned num_vars =
+       nir_intrinsic_infos[instr->intrinsic].num_variables;
+    for (unsigned i = 0; i < num_vars; i++)
+       if (!visit_deref_src(instr->variables[i], cb, state))
+          return false;
+ 
+    return true;
+ }
+ 
+ static bool
+ visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
+ {
+    return true;
+ }
+ 
+ static bool
+ visit_load_const_src(nir_load_const_instr *instr, nir_foreach_src_cb cb,
+                      void *state)
+ {
+    return true;
+ }
+ 
+ static bool
+ visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
+ {
+    nir_foreach_phi_src(instr, src) {
+       if (!visit_src(&src->src, cb, state))
+          return false;
+    }
+ 
+    return true;
+ }
+ 
+ static bool
+ visit_parallel_copy_src(nir_parallel_copy_instr *instr,
+                         nir_foreach_src_cb cb, void *state)
+ {
+    nir_foreach_parallel_copy_entry(instr, entry) {
+       if (!visit_src(&entry->src, cb, state))
+          return false;
+    }
+ 
+    return true;
+ }
+ 
+ typedef struct {
+    void *state;
+    nir_foreach_src_cb cb;
+ } visit_dest_indirect_state;
+ 
+ static bool
+ visit_dest_indirect(nir_dest *dest, void *_state)
+ {
+    visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
+ 
+    if (!dest->is_ssa && dest->reg.indirect)
+       return state->cb(dest->reg.indirect, state->state);
+ 
+    return true;
+ }
+ 
+ bool
+ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
+ {
+    switch (instr->type) {
+    case nir_instr_type_alu:
+       if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
+          return false;
+       break;
+    case nir_instr_type_intrinsic:
+       if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
+          return false;
+       break;
+    case nir_instr_type_tex:
+       if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
+          return false;
+       break;
+    case nir_instr_type_call:
+       if (!visit_call_src(nir_instr_as_call(instr), cb, state))
+          return false;
+       break;
+    case nir_instr_type_load_const:
+       if (!visit_load_const_src(nir_instr_as_load_const(instr), cb, state))
+          return false;
+       break;
+    case nir_instr_type_phi:
+       if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
+          return false;
+       break;
+    case nir_instr_type_parallel_copy:
+       if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
+                                    cb, state))
+          return false;
+       break;
+    case nir_instr_type_jump:
+    case nir_instr_type_ssa_undef:
+       return true;
+ 
+    default:
+       unreachable("Invalid instruction type");
+       break;
+    }
+ 
+    visit_dest_indirect_state dest_state;
+    dest_state.state = state;
+    dest_state.cb = cb;
+    return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
+ }
+ 
+ nir_const_value *
+ nir_src_as_const_value(nir_src src)
+ {
+    if (!src.is_ssa)
+       return NULL;
+ 
+    if (src.ssa->parent_instr->type != nir_instr_type_load_const)
+       return NULL;
+ 
+    nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
+ 
+    return &load->value;
+ }
+ 
+ /**
+  * Returns true if the source is known to be dynamically uniform. Otherwise it
+  * returns false which means it may or may not be dynamically uniform but it
+  * can't be determined.
+  */
+ bool
+ nir_src_is_dynamically_uniform(nir_src src)
+ {
+    if (!src.is_ssa)
+       return false;
+ 
+    /* Constants are trivially dynamically uniform */
+    if (src.ssa->parent_instr->type == nir_instr_type_load_const)
+       return true;
+ 
+    /* As are uniform variables */
+    if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
+       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
+ 
+       if (intr->intrinsic == nir_intrinsic_load_uniform)
+          return true;
+    }
+ 
+    /* XXX: this could have many more tests, such as when a sampler function is
+     * called with dynamically uniform arguments.
+     */
+    return false;
+ }
+ 
+ static void
+ src_remove_all_uses(nir_src *src)
+ {
+    for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
+       if (!src_is_valid(src))
+          continue;
+ 
+       list_del(&src->use_link);
+    }
+ }
+ 
+ static void
+ src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
+ {
+    for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
+       if (!src_is_valid(src))
+          continue;
+ 
+       if (parent_instr) {
+          src->parent_instr = parent_instr;
+          if (src->is_ssa)
+             list_addtail(&src->use_link, &src->ssa->uses);
+          else
+             list_addtail(&src->use_link, &src->reg.reg->uses);
+       } else {
+          assert(parent_if);
+          src->parent_if = parent_if;
+          if (src->is_ssa)
+             list_addtail(&src->use_link, &src->ssa->if_uses);
+          else
+             list_addtail(&src->use_link, &src->reg.reg->if_uses);
+       }
+    }
+ }
+ 
+ void
+ nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
+ {
+    assert(!src_is_valid(src) || src->parent_instr == instr);
+ 
+    src_remove_all_uses(src);
+    *src = new_src;
+    src_add_all_uses(src, instr, NULL);
+ }
+ 
+ void
+ nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
+ {
+    assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
+ 
+    src_remove_all_uses(dest);
+    src_remove_all_uses(src);
+    *dest = *src;
+    *src = NIR_SRC_INIT;
+    src_add_all_uses(dest, dest_instr, NULL);
+ }
+ 
+ void
+ nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
+ {
+    nir_src *src = &if_stmt->condition;
+    assert(!src_is_valid(src) || src->parent_if == if_stmt);
+ 
+    src_remove_all_uses(src);
+    *src = new_src;
+    src_add_all_uses(src, NULL, if_stmt);
+ }
+ 
+ void
+ nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
+ {
+    if (dest->is_ssa) {
+       /* We can only overwrite an SSA destination if it has no uses. */
+       assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
+    } else {
+       list_del(&dest->reg.def_link);
+       if (dest->reg.indirect)
+          src_remove_all_uses(dest->reg.indirect);
+    }
+ 
+    /* We can't re-write with an SSA def */
+    assert(!new_dest.is_ssa);
+ 
+    nir_dest_copy(dest, &new_dest, instr);
+ 
+    dest->reg.parent_instr = instr;
+    list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
+ 
+    if (dest->reg.indirect)
+       src_add_all_uses(dest->reg.indirect, instr, NULL);
+ }
+ 
+ void
+ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
+                  unsigned num_components, const char *name)
+ {
+    def->name = name;
+    def->parent_instr = instr;
+    list_inithead(&def->uses);
+    list_inithead(&def->if_uses);
+    def->num_components = num_components;
+ 
+    if (instr->block) {
+       nir_function_impl *impl =
+          nir_cf_node_get_function(&instr->block->cf_node);
+ 
+       def->index = impl->ssa_alloc++;
+    } else {
+       def->index = UINT_MAX;
+    }
+ }
+ 
+ void
+ nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
+                  unsigned num_components, const char *name)
+ {
+    dest->is_ssa = true;
+    nir_ssa_def_init(instr, &dest->ssa, num_components, name);
+ }
+ 
+ void
+ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
+ {
+    assert(!new_src.is_ssa || def != new_src.ssa);
+ 
+    nir_foreach_use_safe(def, use_src)
+       nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+ 
+    nir_foreach_if_use_safe(def, use_src)
+       nir_if_rewrite_condition(use_src->parent_if, new_src);
+ }
+ 
+ static bool
+ is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
+ {
+    assert(start->block == end->block);
+ 
+    if (between->block != start->block)
+       return false;
+ 
+    /* Search backwards looking for "between" */
+    while (start != end) {
+       if (between == end)
+          return true;
+ 
+       end = nir_instr_prev(end);
+       assert(end);
+    }
+ 
+    return false;
+ }
+ 
+ /* Replaces all uses of the given SSA def with the given source but only if
+  * the use comes after the after_me instruction.  This can be useful if you
+  * are emitting code to fix up the result of some instruction: you can freely
+  * use the result in that code and then call rewrite_uses_after and pass the
+  * last fixup instruction as after_me and it will replace all of the uses you
+  * want without touching the fixup code.
+  *
+  * This function assumes that after_me is in the same block as
+  * def->parent_instr and that after_me comes after def->parent_instr.
+  */
+ void
+ nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+                                nir_instr *after_me)
+ {
+    assert(!new_src.is_ssa || def != new_src.ssa);
+ 
+    nir_foreach_use_safe(def, use_src) {
+       assert(use_src->parent_instr != def->parent_instr);
+       /* Since def already dominates all of its uses, the only way a use can
+        * not be dominated by after_me is if it is between def and after_me in
+        * the instruction list.
+        */
+       if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
+          nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+    }
+ 
+    nir_foreach_if_use_safe(def, use_src)
+       nir_if_rewrite_condition(use_src->parent_if, new_src);
+ }
+ 
+ static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                             bool reverse, void *state);
+ 
+ static inline bool
+ foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state)
+ {
+    if (reverse) {
+       foreach_list_typed_reverse_safe(nir_cf_node, node, node,
+                                       &if_stmt->else_list) {
+          if (!foreach_cf_node(node, cb, reverse, state))
+             return false;
+       }
+ 
+       foreach_list_typed_reverse_safe(nir_cf_node, node, node,
+                                       &if_stmt->then_list) {
+          if (!foreach_cf_node(node, cb, reverse, state))
+             return false;
+       }
+    } else {
+       foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->then_list) {
+          if (!foreach_cf_node(node, cb, reverse, state))
+             return false;
+       }
+ 
+       foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->else_list) {
+          if (!foreach_cf_node(node, cb, reverse, state))
+             return false;
+       }
+    }
+ 
+    return true;
+ }
+ 
+ static inline bool
+ foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state)
+ {
+    if (reverse) {
+       foreach_list_typed_reverse_safe(nir_cf_node, node, node, &loop->body) {
+          if (!foreach_cf_node(node, cb, reverse, state))
+             return false;
+       }
+    } else {
+       foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
+          if (!foreach_cf_node(node, cb, reverse, state))
+             return false;
+       }
+    }
+ 
+    return true;
+ }
+ 
+ static bool
+ foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                 bool reverse, void *state)
+ {
+    switch (node->type) {
+    case nir_cf_node_block:
+       return cb(nir_cf_node_as_block(node), state);
+    case nir_cf_node_if:
+       return foreach_if(nir_cf_node_as_if(node), cb, reverse, state);
+    case nir_cf_node_loop:
+       return foreach_loop(nir_cf_node_as_loop(node), cb, reverse, state);
+       break;
+ 
+    default:
+       unreachable("Invalid CFG node type");
+       break;
+    }
+ 
+    return false;
+ }
+ 
+ bool
+ nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                              void *state)
+ {
+    return foreach_cf_node(node, cb, false, state);
+ }
+ 
+ bool
+ nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, void *state)
+ {
+    foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
+       if (!foreach_cf_node(node, cb, false, state))
+          return false;
+    }
+ 
+    return cb(impl->end_block, state);
+ }
+ 
+ bool
+ nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
+                           void *state)
+ {
+    if (!cb(impl->end_block, state))
+       return false;
+ 
+    foreach_list_typed_reverse_safe(nir_cf_node, node, node, &impl->body) {
+       if (!foreach_cf_node(node, cb, true, state))
+          return false;
+    }
+ 
+    return true;
+ }
+ 
+ nir_if *
+ nir_block_get_following_if(nir_block *block)
+ {
+    if (exec_node_is_tail_sentinel(&block->cf_node.node))
+       return NULL;
+ 
+    if (nir_cf_node_is_last(&block->cf_node))
+       return NULL;
+ 
+    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
+ 
+    if (next_node->type != nir_cf_node_if)
+       return NULL;
+ 
+    return nir_cf_node_as_if(next_node);
+ }
+ 
+ nir_loop *
+ nir_block_get_following_loop(nir_block *block)
+ {
+    if (exec_node_is_tail_sentinel(&block->cf_node.node))
+       return NULL;
+ 
+    if (nir_cf_node_is_last(&block->cf_node))
+       return NULL;
+ 
+    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
+ 
+    if (next_node->type != nir_cf_node_loop)
+       return NULL;
+ 
+    return nir_cf_node_as_loop(next_node);
+ }
+ static bool
+ index_block(nir_block *block, void *state)
+ {
+    unsigned *index = state;
+    block->index = (*index)++;
+    return true;
+ }
+ 
+ void
+ nir_index_blocks(nir_function_impl *impl)
+ {
+    unsigned index = 0;
+ 
+    if (impl->valid_metadata & nir_metadata_block_index)
+       return;
+ 
+    nir_foreach_block(impl, index_block, &index);
+ 
+    impl->num_blocks = index;
+ }
+ 
+ static bool
+ index_ssa_def_cb(nir_ssa_def *def, void *state)
+ {
+    unsigned *index = (unsigned *) state;
+    def->index = (*index)++;
+ 
+    return true;
+ }
+ 
+ static bool
+ index_ssa_block(nir_block *block, void *state)
+ {
+    nir_foreach_instr(block, instr)
+       nir_foreach_ssa_def(instr, index_ssa_def_cb, state);
+ 
+    return true;
+ }
+ 
+ /**
+  * The indices are applied top-to-bottom which has the very nice property
+  * that, if A dominates B, then A->index <= B->index.
+  */
+ void
+ nir_index_ssa_defs(nir_function_impl *impl)
+ {
+    unsigned index = 0;
+    nir_foreach_block(impl, index_ssa_block, &index);
+    impl->ssa_alloc = index;
+ }
+ 
+ static bool
+ index_instrs_block(nir_block *block, void *state)
+ {
+    unsigned *index = state;
+    nir_foreach_instr(block, instr)
+       instr->index = (*index)++;
+ 
+    return true;
+ }
+ 
+ /**
+  * The indices are applied top-to-bottom which has the very nice property
+  * that, if A dominates B, then A->index <= B->index.
+  */
+ unsigned
+ nir_index_instrs(nir_function_impl *impl)
+ {
+    unsigned index = 0;
+    nir_foreach_block(impl, index_instrs_block, &index);
+    return index;
+ }
+ 
+ nir_intrinsic_op
+ nir_intrinsic_from_system_value(gl_system_value val)
+ {
+    switch (val) {
+    case SYSTEM_VALUE_VERTEX_ID:
+       return nir_intrinsic_load_vertex_id;
+    case SYSTEM_VALUE_INSTANCE_ID:
+       return nir_intrinsic_load_instance_id;
+    case SYSTEM_VALUE_DRAW_ID:
+       return nir_intrinsic_load_draw_id;
+    case SYSTEM_VALUE_BASE_INSTANCE:
+       return nir_intrinsic_load_base_instance;
+    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+       return nir_intrinsic_load_vertex_id_zero_base;
+    case SYSTEM_VALUE_BASE_VERTEX:
+       return nir_intrinsic_load_base_vertex;
+    case SYSTEM_VALUE_INVOCATION_ID:
+       return nir_intrinsic_load_invocation_id;
+    case SYSTEM_VALUE_FRONT_FACE:
+       return nir_intrinsic_load_front_face;
+    case SYSTEM_VALUE_SAMPLE_ID:
+       return nir_intrinsic_load_sample_id;
+    case SYSTEM_VALUE_SAMPLE_POS:
+       return nir_intrinsic_load_sample_pos;
+    case SYSTEM_VALUE_SAMPLE_MASK_IN:
+       return nir_intrinsic_load_sample_mask_in;
+    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
+       return nir_intrinsic_load_local_invocation_id;
+    case SYSTEM_VALUE_WORK_GROUP_ID:
+       return nir_intrinsic_load_work_group_id;
+    case SYSTEM_VALUE_NUM_WORK_GROUPS:
+       return nir_intrinsic_load_num_work_groups;
+    case SYSTEM_VALUE_PRIMITIVE_ID:
+       return nir_intrinsic_load_primitive_id;
+    case SYSTEM_VALUE_TESS_COORD:
+       return nir_intrinsic_load_tess_coord;
+    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
+       return nir_intrinsic_load_tess_level_outer;
+    case SYSTEM_VALUE_TESS_LEVEL_INNER:
+       return nir_intrinsic_load_tess_level_inner;
+    case SYSTEM_VALUE_VERTICES_IN:
+       return nir_intrinsic_load_patch_vertices_in;
+    case SYSTEM_VALUE_HELPER_INVOCATION:
+       return nir_intrinsic_load_helper_invocation;
+    default:
+       unreachable("system value does not directly correspond to intrinsic");
+    }
+ }
+ 
+ gl_system_value
+ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
+ {
+    switch (intrin) {
+    case nir_intrinsic_load_vertex_id:
+       return SYSTEM_VALUE_VERTEX_ID;
+    case nir_intrinsic_load_instance_id:
+       return SYSTEM_VALUE_INSTANCE_ID;
+    case nir_intrinsic_load_draw_id:
+       return SYSTEM_VALUE_DRAW_ID;
+    case nir_intrinsic_load_base_instance:
+       return SYSTEM_VALUE_BASE_INSTANCE;
+    case nir_intrinsic_load_vertex_id_zero_base:
+       return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
+    case nir_intrinsic_load_base_vertex:
+       return SYSTEM_VALUE_BASE_VERTEX;
+    case nir_intrinsic_load_invocation_id:
+       return SYSTEM_VALUE_INVOCATION_ID;
+    case nir_intrinsic_load_front_face:
+       return SYSTEM_VALUE_FRONT_FACE;
+    case nir_intrinsic_load_sample_id:
+       return SYSTEM_VALUE_SAMPLE_ID;
+    case nir_intrinsic_load_sample_pos:
+       return SYSTEM_VALUE_SAMPLE_POS;
+    case nir_intrinsic_load_sample_mask_in:
+       return SYSTEM_VALUE_SAMPLE_MASK_IN;
+    case nir_intrinsic_load_local_invocation_id:
+       return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
+    case nir_intrinsic_load_num_work_groups:
+       return SYSTEM_VALUE_NUM_WORK_GROUPS;
+    case nir_intrinsic_load_work_group_id:
+       return SYSTEM_VALUE_WORK_GROUP_ID;
+    case nir_intrinsic_load_primitive_id:
+       return SYSTEM_VALUE_PRIMITIVE_ID;
+    case nir_intrinsic_load_tess_coord:
+       return SYSTEM_VALUE_TESS_COORD;
+    case nir_intrinsic_load_tess_level_outer:
+       return SYSTEM_VALUE_TESS_LEVEL_OUTER;
+    case nir_intrinsic_load_tess_level_inner:
+       return SYSTEM_VALUE_TESS_LEVEL_INNER;
+    case nir_intrinsic_load_patch_vertices_in:
+       return SYSTEM_VALUE_VERTICES_IN;
+    case nir_intrinsic_load_helper_invocation:
+       return SYSTEM_VALUE_HELPER_INVOCATION;
+    default:
+       unreachable("intrinsic doesn't produce a system value");
+    }
+ }
diff --cc src/compiler/nir/nir.h

index 0000000000000000000000000000000000000000,aec75fb930cfdb74118dc7552a8a6b20e205159c..f130e5e0eb133a5dd07a9dbb3b73b685e6307278

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@@ -1,0 -1,2111 +1,2239 @@@
- -#define NIR_SRC_INIT (nir_src) { { NULL } }
+ /*
+  * Copyright © 2014 Connor Abbott
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ #pragma once
+ 
+ #include "util/hash_table.h"
+ #include "compiler/glsl/list.h"
+ #include "GL/gl.h" /* GLenum */
+ #include "util/list.h"
+ #include "util/ralloc.h"
+ #include "util/set.h"
+ #include "util/bitset.h"
+ #include "compiler/nir_types.h"
+ #include "compiler/shader_enums.h"
+ #include <stdio.h>
+ 
+ #include "nir_opcodes.h"
+ 
+ #ifdef __cplusplus
+ extern "C" {
+ #endif
+ 
+ struct gl_program;
+ struct gl_shader_program;
+ 
+ #define NIR_FALSE 0u
+ #define NIR_TRUE (~0u)
+ 
+ /** Defines a cast function
+  *
+  * This macro defines a cast function from in_type to out_type where
+  * out_type is some structure type that contains a field of type out_type.
+  *
+  * Note that you have to be a bit careful as the generated cast function
+  * destroys constness.
+  */
+ #define NIR_DEFINE_CAST(name, in_type, out_type, field)  \
+ static inline out_type *                                 \
+ name(const in_type *parent)                              \
+ {                                                        \
+    return exec_node_data(out_type, parent, field);       \
+ }
+ 
+ struct nir_function;
+ struct nir_shader;
+ struct nir_instr;
+ 
+ 
+ /**
+  * Description of built-in state associated with a uniform
+  *
+  * \sa nir_variable::state_slots
+  */
+ typedef struct {
+    int tokens[5];
+    int swizzle;
+ } nir_state_slot;
+ 
+ typedef enum {
+    nir_var_all = -1,
+    nir_var_shader_in,
+    nir_var_shader_out,
+    nir_var_global,
+    nir_var_local,
+    nir_var_uniform,
+    nir_var_shader_storage,
++   nir_var_shared,
+    nir_var_system_value
+ } nir_variable_mode;
+ 
+ /**
+  * Data stored in an nir_constant
+  */
+ union nir_constant_data {
+    unsigned u[16];
+    int i[16];
+    float f[16];
+    bool b[16];
+ };
+ 
+ typedef struct nir_constant {
+    /**
+     * Value of the constant.
+     *
+     * The field used to back the values supplied by the constant is determined
+     * by the type associated with the \c nir_variable.  Constants may be
+     * scalars, vectors, or matrices.
+     */
+    union nir_constant_data value;
+ 
+    /* we could get this from the var->type but makes clone *much* easier to
+     * not have to care about the type.
+     */
+    unsigned num_elements;
+ 
+    /* Array elements / Structure Fields */
+    struct nir_constant **elements;
+ } nir_constant;
+ 
+ /**
+  * \brief Layout qualifiers for gl_FragDepth.
+  *
+  * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
+  * with a layout qualifier.
+  */
+ typedef enum {
+     nir_depth_layout_none, /**< No depth layout is specified. */
+     nir_depth_layout_any,
+     nir_depth_layout_greater,
+     nir_depth_layout_less,
+     nir_depth_layout_unchanged
+ } nir_depth_layout;
+ 
+ /**
+  * Either a uniform, global variable, shader input, or shader output. Based on
+  * ir_variable - it should be easy to translate between the two.
+  */
+ 
+ typedef struct nir_variable {
+    struct exec_node node;
+ 
+    /**
+     * Declared type of the variable
+     */
+    const struct glsl_type *type;
+ 
+    /**
+     * Declared name of the variable
+     */
+    char *name;
+ 
+    struct nir_variable_data {
+ 
+       /**
+        * Is the variable read-only?
+        *
+        * This is set for variables declared as \c const, shader inputs,
+        * and uniforms.
+        */
+       unsigned read_only:1;
+       unsigned centroid:1;
+       unsigned sample:1;
+       unsigned patch:1;
+       unsigned invariant:1;
+ 
+       /**
+        * Storage class of the variable.
+        *
+        * \sa nir_variable_mode
+        */
+       nir_variable_mode mode:4;
+ 
+       /**
+        * Interpolation mode for shader inputs / outputs
+        *
+        * \sa glsl_interp_qualifier
+        */
+       unsigned interpolation:2;
+ 
+       /**
+        * \name ARB_fragment_coord_conventions
+        * @{
+        */
+       unsigned origin_upper_left:1;
+       unsigned pixel_center_integer:1;
+       /*@}*/
+ 
+       /**
+        * Was the location explicitly set in the shader?
+        *
+        * If the location is explicitly set in the shader, it \b cannot be changed
+        * by the linker or by the API (e.g., calls to \c glBindAttribLocation have
+        * no effect).
+        */
+       unsigned explicit_location:1;
+       unsigned explicit_index:1;
+ 
+       /**
+        * Was an initial binding explicitly set in the shader?
+        *
+        * If so, constant_initializer contains an integer nir_constant
+        * representing the initial binding point.
+        */
+       unsigned explicit_binding:1;
+ 
+       /**
+        * Does this variable have an initializer?
+        *
+        * This is used by the linker to cross-validiate initializers of global
+        * variables.
+        */
+       unsigned has_initializer:1;
+ 
+       /**
+        * If non-zero, then this variable may be packed along with other variables
+        * into a single varying slot, so this offset should be applied when
+        * accessing components.  For example, an offset of 1 means that the x
+        * component of this variable is actually stored in component y of the
+        * location specified by \c location.
+        */
+       unsigned location_frac:2;
+ 
+       /**
+        * Non-zero if this variable was created by lowering a named interface
+        * block which was not an array.
+        *
+        * Note that this variable and \c from_named_ifc_block_array will never
+        * both be non-zero.
+        */
+       unsigned from_named_ifc_block_nonarray:1;
+ 
+       /**
+        * Non-zero if this variable was created by lowering a named interface
+        * block which was an array.
+        *
+        * Note that this variable and \c from_named_ifc_block_nonarray will never
+        * both be non-zero.
+        */
+       unsigned from_named_ifc_block_array:1;
+ 
+       /**
+        * \brief Layout qualifier for gl_FragDepth.
+        *
+        * This is not equal to \c ir_depth_layout_none if and only if this
+        * variable is \c gl_FragDepth and a layout qualifier is specified.
+        */
+       nir_depth_layout depth_layout;
+ 
+       /**
+        * Storage location of the base of this variable
+        *
+        * The precise meaning of this field depends on the nature of the variable.
+        *
+        *   - Vertex shader input: one of the values from \c gl_vert_attrib.
+        *   - Vertex shader output: one of the values from \c gl_varying_slot.
+        *   - Geometry shader input: one of the values from \c gl_varying_slot.
+        *   - Geometry shader output: one of the values from \c gl_varying_slot.
+        *   - Fragment shader input: one of the values from \c gl_varying_slot.
+        *   - Fragment shader output: one of the values from \c gl_frag_result.
+        *   - Uniforms: Per-stage uniform slot number for default uniform block.
+        *   - Uniforms: Index within the uniform block definition for UBO members.
+        *   - Non-UBO Uniforms: uniform slot number.
+        *   - Other: This field is not currently used.
+        *
+        * If the variable is a uniform, shader input, or shader output, and the
+        * slot has not been assigned, the value will be -1.
+        */
+       int location;
+ 
+       /**
+        * The actual location of the variable in the IR. Only valid for inputs
+        * and outputs.
+        */
+       unsigned int driver_location;
+ 
+       /**
+        * output index for dual source blending.
+        */
+       int index;
+ 
++      /**
++       * Descriptor set binding for sampler or UBO.
++       */
++      int descriptor_set;
++
+       /**
+        * Initial binding point for a sampler or UBO.
+        *
+        * For array types, this represents the binding point for the first element.
+        */
+       int binding;
+ 
+       /**
+        * Location an atomic counter is stored at.
+        */
+       unsigned offset;
+ 
+       /**
+        * ARB_shader_image_load_store qualifiers.
+        */
+       struct {
+          bool read_only; /**< "readonly" qualifier. */
+          bool write_only; /**< "writeonly" qualifier. */
+          bool coherent;
+          bool _volatile;
+          bool restrict_flag;
+ 
+          /** Image internal format if specified explicitly, otherwise GL_NONE. */
+          GLenum format;
+       } image;
+ 
+       /**
+        * Highest element accessed with a constant expression array index
+        *
+        * Not used for non-array variables.
+        */
+       unsigned max_array_access;
+ 
+    } data;
+ 
+    /**
+     * Built-in state that backs this uniform
+     *
+     * Once set at variable creation, \c state_slots must remain invariant.
+     * This is because, ideally, this array would be shared by all clones of
+     * this variable in the IR tree.  In other words, we'd really like for it
+     * to be a fly-weight.
+     *
+     * If the variable is not a uniform, \c num_state_slots will be zero and
+     * \c state_slots will be \c NULL.
+     */
+    /*@{*/
+    unsigned num_state_slots;    /**< Number of state slots used */
+    nir_state_slot *state_slots;  /**< State descriptors. */
+    /*@}*/
+ 
+    /**
+     * Constant expression assigned in the initializer of the variable
+     */
+    nir_constant *constant_initializer;
+ 
+    /**
+     * For variables that are in an interface block or are an instance of an
+     * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
+     *
+     * \sa ir_variable::location
+     */
+    const struct glsl_type *interface_type;
+ } nir_variable;
+ 
+ #define nir_foreach_variable(var, var_list) \
+    foreach_list_typed(nir_variable, var, node, var_list)
+ 
++/**
++ * Returns the bits in the inputs_read, outputs_written, or
++ * system_values_read bitfield corresponding to this variable.
++ */
++static inline uint64_t
++nir_variable_get_io_mask(nir_variable *var, gl_shader_stage stage)
++{
++   assert(var->data.mode == nir_var_shader_in ||
++          var->data.mode == nir_var_shader_out ||
++          var->data.mode == nir_var_system_value);
++   assert(var->data.location >= 0);
++
++   const struct glsl_type *var_type = var->type;
++   if (stage == MESA_SHADER_GEOMETRY && var->data.mode == nir_var_shader_in) {
++      /* Most geometry shader inputs are per-vertex arrays */
++      if (var->data.location >= VARYING_SLOT_VAR0)
++         assert(glsl_type_is_array(var_type));
++
++      if (glsl_type_is_array(var_type))
++         var_type = glsl_get_array_element(var_type);
++   }
++
++   bool is_vertex_input = (var->data.mode == nir_var_shader_in &&
++                           stage == MESA_SHADER_VERTEX);
++   unsigned slots = glsl_count_attribute_slots(var_type, is_vertex_input);
++   return ((1ull << slots) - 1) << var->data.location;
++}
++
+ typedef struct nir_register {
+    struct exec_node node;
+ 
+    unsigned num_components; /** < number of vector components */
+    unsigned num_array_elems; /** < size of array (0 for no array) */
+ 
+    /** generic register index. */
+    unsigned index;
+ 
+    /** only for debug purposes, can be NULL */
+    const char *name;
+ 
+    /** whether this register is local (per-function) or global (per-shader) */
+    bool is_global;
+ 
+    /**
+     * If this flag is set to true, then accessing channels >= num_components
+     * is well-defined, and simply spills over to the next array element. This
+     * is useful for backends that can do per-component accessing, in
+     * particular scalar backends. By setting this flag and making
+     * num_components equal to 1, structures can be packed tightly into
+     * registers and then registers can be accessed per-component to get to
+     * each structure member, even if it crosses vec4 boundaries.
+     */
+    bool is_packed;
+ 
+    /** set of nir_src's where this register is used (read from) */
+    struct list_head uses;
+ 
+    /** set of nir_dest's where this register is defined (written to) */
+    struct list_head defs;
+ 
+    /** set of nir_if's where this register is used as a condition */
+    struct list_head if_uses;
+ } nir_register;
+ 
+ typedef enum {
+    nir_instr_type_alu,
+    nir_instr_type_call,
+    nir_instr_type_tex,
+    nir_instr_type_intrinsic,
+    nir_instr_type_load_const,
+    nir_instr_type_jump,
+    nir_instr_type_ssa_undef,
+    nir_instr_type_phi,
+    nir_instr_type_parallel_copy,
+ } nir_instr_type;
+ 
+ typedef struct nir_instr {
+    struct exec_node node;
+    nir_instr_type type;
+    struct nir_block *block;
+ 
+    /** generic instruction index. */
+    unsigned index;
+ 
+    /* A temporary for optimization and analysis passes to use for storing
+     * flags.  For instance, DCE uses this to store the "dead/live" info.
+     */
+    uint8_t pass_flags;
+ } nir_instr;
+ 
+ static inline nir_instr *
+ nir_instr_next(nir_instr *instr)
+ {
+    struct exec_node *next = exec_node_get_next(&instr->node);
+    if (exec_node_is_tail_sentinel(next))
+       return NULL;
+    else
+       return exec_node_data(nir_instr, next, node);
+ }
+ 
+ static inline nir_instr *
+ nir_instr_prev(nir_instr *instr)
+ {
+    struct exec_node *prev = exec_node_get_prev(&instr->node);
+    if (exec_node_is_head_sentinel(prev))
+       return NULL;
+    else
+       return exec_node_data(nir_instr, prev, node);
+ }
+ 
+ static inline bool
+ nir_instr_is_first(nir_instr *instr)
+ {
+    return exec_node_is_head_sentinel(exec_node_get_prev(&instr->node));
+ }
+ 
+ static inline bool
+ nir_instr_is_last(nir_instr *instr)
+ {
+    return exec_node_is_tail_sentinel(exec_node_get_next(&instr->node));
+ }
+ 
+ typedef struct nir_ssa_def {
+    /** for debugging only, can be NULL */
+    const char* name;
+ 
+    /** generic SSA definition index. */
+    unsigned index;
+ 
+    /** Index into the live_in and live_out bitfields */
+    unsigned live_index;
+ 
+    nir_instr *parent_instr;
+ 
+    /** set of nir_instr's where this register is used (read from) */
+    struct list_head uses;
+ 
+    /** set of nir_if's where this register is used as a condition */
+    struct list_head if_uses;
+ 
+    uint8_t num_components;
+ } nir_ssa_def;
+ 
+ struct nir_src;
+ 
+ typedef struct {
+    nir_register *reg;
+    struct nir_src *indirect; /** < NULL for no indirect offset */
+    unsigned base_offset;
+ 
+    /* TODO use-def chain goes here */
+ } nir_reg_src;
+ 
+ typedef struct {
+    nir_instr *parent_instr;
+    struct list_head def_link;
+ 
+    nir_register *reg;
+    struct nir_src *indirect; /** < NULL for no indirect offset */
+    unsigned base_offset;
+ 
+    /* TODO def-use chain goes here */
+ } nir_reg_dest;
+ 
+ struct nir_if;
+ 
+ typedef struct nir_src {
+    union {
+       nir_instr *parent_instr;
+       struct nir_if *parent_if;
+    };
+ 
+    struct list_head use_link;
+ 
+    union {
+       nir_reg_src reg;
+       nir_ssa_def *ssa;
+    };
+ 
+    bool is_ssa;
+ } nir_src;
+ 
- -#define NIR_DEST_INIT (nir_dest) { { { NULL } } }
++#ifdef __cplusplus
++#  define NIR_SRC_INIT nir_src()
++#else
++#  define NIR_SRC_INIT (nir_src) { { NULL } }
++#endif
+ 
+ #define nir_foreach_use(reg_or_ssa_def, src) \
+    list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
+ 
+ #define nir_foreach_use_safe(reg_or_ssa_def, src) \
+    list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
+ 
+ #define nir_foreach_if_use(reg_or_ssa_def, src) \
+    list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
+ 
+ #define nir_foreach_if_use_safe(reg_or_ssa_def, src) \
+    list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
+ 
+ typedef struct {
+    union {
+       nir_reg_dest reg;
+       nir_ssa_def ssa;
+    };
+ 
+    bool is_ssa;
+ } nir_dest;
+ 
- -   /** The size of the sampler array or 0 if it's not an array */
- -   unsigned sampler_array_size;
- -
- -   nir_deref_var *sampler; /* if this is NULL, use sampler_index instead */
++#ifdef __cplusplus
++#  define NIR_DEST_INIT nir_dest()
++#else
++#  define NIR_DEST_INIT (nir_dest) { { { NULL } } }
++#endif
+ 
+ #define nir_foreach_def(reg, dest) \
+    list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
+ 
+ #define nir_foreach_def_safe(reg, dest) \
+    list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link)
+ 
+ static inline nir_src
+ nir_src_for_ssa(nir_ssa_def *def)
+ {
+    nir_src src = NIR_SRC_INIT;
+ 
+    src.is_ssa = true;
+    src.ssa = def;
+ 
+    return src;
+ }
+ 
+ static inline nir_src
+ nir_src_for_reg(nir_register *reg)
+ {
+    nir_src src = NIR_SRC_INIT;
+ 
+    src.is_ssa = false;
+    src.reg.reg = reg;
+    src.reg.indirect = NULL;
+    src.reg.base_offset = 0;
+ 
+    return src;
+ }
+ 
+ static inline nir_dest
+ nir_dest_for_reg(nir_register *reg)
+ {
+    nir_dest dest = NIR_DEST_INIT;
+ 
+    dest.reg.reg = reg;
+ 
+    return dest;
+ }
+ 
+ void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
+ void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
+ 
+ typedef struct {
+    nir_src src;
+ 
+    /**
+     * \name input modifiers
+     */
+    /*@{*/
+    /**
+     * For inputs interpreted as floating point, flips the sign bit. For
+     * inputs interpreted as integers, performs the two's complement negation.
+     */
+    bool negate;
+ 
+    /**
+     * Clears the sign bit for floating point values, and computes the integer
+     * absolute value for integers. Note that the negate modifier acts after
+     * the absolute value modifier, therefore if both are set then all inputs
+     * will become negative.
+     */
+    bool abs;
+    /*@}*/
+ 
+    /**
+     * For each input component, says which component of the register it is
+     * chosen from. Note that which elements of the swizzle are used and which
+     * are ignored are based on the write mask for most opcodes - for example,
+     * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and
+     * a swizzle of {2, x, 1, 0} where x means "don't care."
+     */
+    uint8_t swizzle[4];
+ } nir_alu_src;
+ 
+ typedef struct {
+    nir_dest dest;
+ 
+    /**
+     * \name saturate output modifier
+     *
+     * Only valid for opcodes that output floating-point numbers. Clamps the
+     * output to between 0.0 and 1.0 inclusive.
+     */
+ 
+    bool saturate;
+ 
+    unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
+ } nir_alu_dest;
+ 
+ typedef enum {
+    nir_type_invalid = 0, /* Not a valid type */
+    nir_type_float,
+    nir_type_int,
+    nir_type_uint,
+    nir_type_bool
+ } nir_alu_type;
+ 
+ typedef enum {
+    NIR_OP_IS_COMMUTATIVE = (1 << 0),
+    NIR_OP_IS_ASSOCIATIVE = (1 << 1),
+ } nir_op_algebraic_property;
+ 
+ typedef struct {
+    const char *name;
+ 
+    unsigned num_inputs;
+ 
+    /**
+     * The number of components in the output
+     *
+     * If non-zero, this is the size of the output and input sizes are
+     * explicitly given; swizzle and writemask are still in effect, but if
+     * the output component is masked out, then the input component may
+     * still be in use.
+     *
+     * If zero, the opcode acts in the standard, per-component manner; the
+     * operation is performed on each component (except the ones that are
+     * masked out) with the input being taken from the input swizzle for
+     * that component.
+     *
+     * The size of some of the inputs may be given (i.e. non-zero) even
+     * though output_size is zero; in that case, the inputs with a zero
+     * size act per-component, while the inputs with non-zero size don't.
+     */
+    unsigned output_size;
+ 
+    /**
+     * The type of vector that the instruction outputs. Note that the
+     * staurate modifier is only allowed on outputs with the float type.
+     */
+ 
+    nir_alu_type output_type;
+ 
+    /**
+     * The number of components in each input
+     */
+    unsigned input_sizes[4];
+ 
+    /**
+     * The type of vector that each input takes. Note that negate and
+     * absolute value are only allowed on inputs with int or float type and
+     * behave differently on the two.
+     */
+    nir_alu_type input_types[4];
+ 
+    nir_op_algebraic_property algebraic_properties;
+ } nir_op_info;
+ 
+ extern const nir_op_info nir_op_infos[nir_num_opcodes];
+ 
+ typedef struct nir_alu_instr {
+    nir_instr instr;
+    nir_op op;
+    nir_alu_dest dest;
+    nir_alu_src src[];
+ } nir_alu_instr;
+ 
+ void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+                       nir_alu_instr *instr);
+ void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+                        nir_alu_instr *instr);
+ 
+ /* is this source channel used? */
+ static inline bool
+ nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
+ {
+    if (nir_op_infos[instr->op].input_sizes[src] > 0)
+       return channel < nir_op_infos[instr->op].input_sizes[src];
+ 
+    return (instr->dest.write_mask >> channel) & 1;
+ }
+ 
+ /*
+  * For instructions whose destinations are SSA, get the number of channels
+  * used for a source
+  */
+ static inline unsigned
+ nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
+ {
+    assert(instr->dest.dest.is_ssa);
+ 
+    if (nir_op_infos[instr->op].input_sizes[src] > 0)
+       return nir_op_infos[instr->op].input_sizes[src];
+ 
+    return instr->dest.dest.ssa.num_components;
+ }
+ 
+ typedef enum {
+    nir_deref_type_var,
+    nir_deref_type_array,
+    nir_deref_type_struct
+ } nir_deref_type;
+ 
+ typedef struct nir_deref {
+    nir_deref_type deref_type;
+    struct nir_deref *child;
+    const struct glsl_type *type;
+ } nir_deref;
+ 
+ typedef struct {
+    nir_deref deref;
+ 
+    nir_variable *var;
+ } nir_deref_var;
+ 
+ /* This enum describes how the array is referenced.  If the deref is
+  * direct then the base_offset is used.  If the deref is indirect then then
+  * offset is given by base_offset + indirect.  If the deref is a wildcard
+  * then the deref refers to all of the elements of the array at the same
+  * time.  Wildcard dereferences are only ever allowed in copy_var
+  * intrinsics and the source and destination derefs must have matching
+  * wildcards.
+  */
+ typedef enum {
+    nir_deref_array_type_direct,
+    nir_deref_array_type_indirect,
+    nir_deref_array_type_wildcard,
+ } nir_deref_array_type;
+ 
+ typedef struct {
+    nir_deref deref;
+ 
+    nir_deref_array_type deref_array_type;
+    unsigned base_offset;
+    nir_src indirect;
+ } nir_deref_array;
+ 
+ typedef struct {
+    nir_deref deref;
+ 
+    unsigned index;
+ } nir_deref_struct;
+ 
+ NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref)
+ NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref)
+ NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref)
+ 
+ /* Returns the last deref in the chain. */
+ static inline nir_deref *
+ nir_deref_tail(nir_deref *deref)
+ {
+    while (deref->child)
+       deref = deref->child;
+    return deref;
+ }
+ 
+ typedef struct {
+    nir_instr instr;
+ 
+    unsigned num_params;
+    nir_deref_var **params;
+    nir_deref_var *return_deref;
+ 
+    struct nir_function *callee;
+ } nir_call_instr;
+ 
+ #define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \
+                   num_variables, num_indices, flags) \
+    nir_intrinsic_##name,
+ 
+ #define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name,
+ 
+ typedef enum {
+ #include "nir_intrinsics.h"
+    nir_num_intrinsics = nir_last_intrinsic + 1
+ } nir_intrinsic_op;
+ 
+ #undef INTRINSIC
+ #undef LAST_INTRINSIC
+ 
+ /** Represents an intrinsic
+  *
+  * An intrinsic is an instruction type for handling things that are
+  * more-or-less regular operations but don't just consume and produce SSA
+  * values like ALU operations do.  Intrinsics are not for things that have
+  * special semantic meaning such as phi nodes and parallel copies.
+  * Examples of intrinsics include variable load/store operations, system
+  * value loads, and the like.  Even though texturing more-or-less falls
+  * under this category, texturing is its own instruction type because
+  * trying to represent texturing with intrinsics would lead to a
+  * combinatorial explosion of intrinsic opcodes.
+  *
+  * By having a single instruction type for handling a lot of different
+  * cases, optimization passes can look for intrinsics and, for the most
+  * part, completely ignore them.  Each intrinsic type also has a few
+  * possible flags that govern whether or not they can be reordered or
+  * eliminated.  That way passes like dead code elimination can still work
+  * on intrisics without understanding the meaning of each.
+  *
+  * Each intrinsic has some number of constant indices, some number of
+  * variables, and some number of sources.  What these sources, variables,
+  * and indices mean depends on the intrinsic and is documented with the
+  * intrinsic declaration in nir_intrinsics.h.  Intrinsics and texture
+  * instructions are the only types of instruction that can operate on
+  * variables.
+  */
+ typedef struct {
+    nir_instr instr;
+ 
+    nir_intrinsic_op intrinsic;
+ 
+    nir_dest dest;
+ 
+    /** number of components if this is a vectorized intrinsic
+     *
+     * Similarly to ALU operations, some intrinsics are vectorized.
+     * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0.
+     * For vectorized intrinsics, the num_components field specifies the
+     * number of destination components and the number of source components
+     * for all sources with nir_intrinsic_infos.src_components[i] == 0.
+     */
+    uint8_t num_components;
+ 
+    int const_index[3];
+ 
+    nir_deref_var *variables[2];
+ 
+    nir_src src[];
+ } nir_intrinsic_instr;
+ 
+ /**
+  * \name NIR intrinsics semantic flags
+  *
+  * information about what the compiler can do with the intrinsics.
+  *
+  * \sa nir_intrinsic_info::flags
+  */
+ typedef enum {
+    /**
+     * whether the intrinsic can be safely eliminated if none of its output
+     * value is not being used.
+     */
+    NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0),
+ 
+    /**
+     * Whether the intrinsic can be reordered with respect to any other
+     * intrinsic, i.e. whether the only reordering dependencies of the
+     * intrinsic are due to the register reads/writes.
+     */
+    NIR_INTRINSIC_CAN_REORDER = (1 << 1),
+ } nir_intrinsic_semantic_flag;
+ 
+ #define NIR_INTRINSIC_MAX_INPUTS 4
+ 
+ typedef struct {
+    const char *name;
+ 
+    unsigned num_srcs; /** < number of register/SSA inputs */
+ 
+    /** number of components of each input register
+     *
+     * If this value is 0, the number of components is given by the
+     * num_components field of nir_intrinsic_instr.
+     */
+    unsigned src_components[NIR_INTRINSIC_MAX_INPUTS];
+ 
+    bool has_dest;
+ 
+    /** number of components of the output register
+     *
+     * If this value is 0, the number of components is given by the
+     * num_components field of nir_intrinsic_instr.
+     */
+    unsigned dest_components;
+ 
+    /** the number of inputs/outputs that are variables */
+    unsigned num_variables;
+ 
+    /** the number of constant indices used by the intrinsic */
+    unsigned num_indices;
+ 
+    /** semantic flags for calls to this intrinsic */
+    nir_intrinsic_semantic_flag flags;
+ } nir_intrinsic_info;
+ 
+ extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
+ 
+ /**
+  * \group texture information
+  *
+  * This gives semantic information about textures which is useful to the
+  * frontend, the backend, and lowering passes, but not the optimizer.
+  */
+ 
+ typedef enum {
+    nir_tex_src_coord,
+    nir_tex_src_projector,
+    nir_tex_src_comparitor, /* shadow comparitor */
+    nir_tex_src_offset,
+    nir_tex_src_bias,
+    nir_tex_src_lod,
+    nir_tex_src_ms_index, /* MSAA sample index */
+    nir_tex_src_ddx,
+    nir_tex_src_ddy,
++   nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
+    nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
+    nir_num_tex_src_types
+ } nir_tex_src_type;
+ 
+ typedef struct {
+    nir_src src;
+    nir_tex_src_type src_type;
+ } nir_tex_src;
+ 
+ typedef enum {
+    nir_texop_tex,                /**< Regular texture look-up */
+    nir_texop_txb,                /**< Texture look-up with LOD bias */
+    nir_texop_txl,                /**< Texture look-up with explicit LOD */
+    nir_texop_txd,                /**< Texture look-up with partial derivatvies */
+    nir_texop_txf,                /**< Texel fetch with explicit LOD */
+    nir_texop_txf_ms,                /**< Multisample texture fetch */
+    nir_texop_txs,                /**< Texture size */
+    nir_texop_lod,                /**< Texture lod query */
+    nir_texop_tg4,                /**< Texture gather */
+    nir_texop_query_levels,       /**< Texture levels query */
+    nir_texop_texture_samples,    /**< Texture samples query */
+    nir_texop_samples_identical,  /**< Query whether all samples are definitely
+                                   * identical.
+                                   */
+ } nir_texop;
+ 
+ typedef struct {
+    nir_instr instr;
+ 
+    enum glsl_sampler_dim sampler_dim;
+    nir_alu_type dest_type;
+ 
+    nir_texop op;
+    nir_dest dest;
+    nir_tex_src *src;
+    unsigned num_srcs, coord_components;
+    bool is_array, is_shadow;
+ 
+    /**
+     * If is_shadow is true, whether this is the old-style shadow that outputs 4
+     * components or the new-style shadow that outputs 1 component.
+     */
+    bool is_new_style_shadow;
+ 
+    /* constant offset - must be 0 if the offset source is used */
+    int const_offset[4];
+ 
+    /* gather component selector */
+    unsigned component : 2;
+ 
++   /** The texture index
++    *
++    * If this texture instruction has a nir_tex_src_texture_offset source,
++    * then the texture index is given by texture_index + texture_offset.
++    */
++   unsigned texture_index;
++
++   /** The size of the texture array or 0 if it's not an array */
++   unsigned texture_array_size;
++
++   /** The texture deref
++    *
++    * If both this and `sampler` are both NULL, use texture_index instead.
++    * If `texture` is NULL, but `sampler` is non-NULL, then the texture is
++    * implied from the sampler.
++    */
++   nir_deref_var *texture;
++
+    /** The sampler index
+     *
+     * If this texture instruction has a nir_tex_src_sampler_offset source,
+     * then the sampler index is given by sampler_index + sampler_offset.
+     */
+    unsigned sampler_index;
+ 
- -   unsigned num_inputs, num_uniforms, num_outputs;
++   /** The sampler deref
++    *
++    * If this is null, use sampler_index instead.
++    */
++   nir_deref_var *sampler;
+ } nir_tex_instr;
+ 
+ static inline unsigned
+ nir_tex_instr_dest_size(nir_tex_instr *instr)
+ {
+    switch (instr->op) {
+    case nir_texop_txs: {
+       unsigned ret;
+       switch (instr->sampler_dim) {
+          case GLSL_SAMPLER_DIM_1D:
+          case GLSL_SAMPLER_DIM_BUF:
+             ret = 1;
+             break;
+          case GLSL_SAMPLER_DIM_2D:
+          case GLSL_SAMPLER_DIM_CUBE:
+          case GLSL_SAMPLER_DIM_MS:
+          case GLSL_SAMPLER_DIM_RECT:
+          case GLSL_SAMPLER_DIM_EXTERNAL:
+             ret = 2;
+             break;
+          case GLSL_SAMPLER_DIM_3D:
+             ret = 3;
+             break;
+          default:
+             unreachable("not reached");
+       }
+       if (instr->is_array)
+          ret++;
+       return ret;
+    }
+ 
+    case nir_texop_lod:
+       return 2;
+ 
+    case nir_texop_texture_samples:
+    case nir_texop_query_levels:
+    case nir_texop_samples_identical:
+       return 1;
+ 
+    default:
+       if (instr->is_shadow && instr->is_new_style_shadow)
+          return 1;
+ 
+       return 4;
+    }
+ }
+ 
+ /* Returns true if this texture operation queries something about the texture
+  * rather than actually sampling it.
+  */
+ static inline bool
+ nir_tex_instr_is_query(nir_tex_instr *instr)
+ {
+    switch (instr->op) {
+    case nir_texop_txs:
+    case nir_texop_lod:
+    case nir_texop_texture_samples:
+    case nir_texop_query_levels:
+       return true;
+    case nir_texop_tex:
+    case nir_texop_txb:
+    case nir_texop_txl:
+    case nir_texop_txd:
+    case nir_texop_txf:
+    case nir_texop_txf_ms:
+    case nir_texop_tg4:
+       return false;
+    default:
+       unreachable("Invalid texture opcode");
+    }
+ }
+ 
+ static inline unsigned
+ nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
+ {
+    if (instr->src[src].src_type == nir_tex_src_coord)
+       return instr->coord_components;
+ 
+ 
+    if (instr->src[src].src_type == nir_tex_src_offset ||
+        instr->src[src].src_type == nir_tex_src_ddx ||
+        instr->src[src].src_type == nir_tex_src_ddy) {
+       if (instr->is_array)
+          return instr->coord_components - 1;
+       else
+          return instr->coord_components;
+    }
+ 
+    return 1;
+ }
+ 
+ static inline int
+ nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
+ {
+    for (unsigned i = 0; i < instr->num_srcs; i++)
+       if (instr->src[i].src_type == type)
+          return (int) i;
+ 
+    return -1;
+ }
+ 
+ typedef struct {
+    union {
+       float f[4];
+       int32_t i[4];
+       uint32_t u[4];
+    };
+ } nir_const_value;
+ 
+ typedef struct {
+    nir_instr instr;
+ 
+    nir_const_value value;
+ 
+    nir_ssa_def def;
+ } nir_load_const_instr;
+ 
+ typedef enum {
+    nir_jump_return,
+    nir_jump_break,
+    nir_jump_continue,
+ } nir_jump_type;
+ 
+ typedef struct {
+    nir_instr instr;
+    nir_jump_type type;
+ } nir_jump_instr;
+ 
+ /* creates a new SSA variable in an undefined state */
+ 
+ typedef struct {
+    nir_instr instr;
+    nir_ssa_def def;
+ } nir_ssa_undef_instr;
+ 
+ typedef struct {
+    struct exec_node node;
+ 
+    /* The predecessor block corresponding to this source */
+    struct nir_block *pred;
+ 
+    nir_src src;
+ } nir_phi_src;
+ 
+ #define nir_foreach_phi_src(phi, entry) \
+    foreach_list_typed(nir_phi_src, entry, node, &(phi)->srcs)
+ #define nir_foreach_phi_src_safe(phi, entry) \
+    foreach_list_typed_safe(nir_phi_src, entry, node, &(phi)->srcs)
+ 
+ typedef struct {
+    nir_instr instr;
+ 
+    struct exec_list srcs; /** < list of nir_phi_src */
+ 
+    nir_dest dest;
+ } nir_phi_instr;
+ 
+ typedef struct {
+    struct exec_node node;
+    nir_src src;
+    nir_dest dest;
+ } nir_parallel_copy_entry;
+ 
+ #define nir_foreach_parallel_copy_entry(pcopy, entry) \
+    foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries)
+ 
+ typedef struct {
+    nir_instr instr;
+ 
+    /* A list of nir_parallel_copy_entry's.  The sources of all of the
+     * entries are copied to the corresponding destinations "in parallel".
+     * In other words, if we have two entries: a -> b and b -> a, the values
+     * get swapped.
+     */
+    struct exec_list entries;
+ } nir_parallel_copy_instr;
+ 
+ NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr)
+ NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
+                 nir_parallel_copy_instr, instr)
+ 
+ /*
+  * Control flow
+  *
+  * Control flow consists of a tree of control flow nodes, which include
+  * if-statements and loops. The leaves of the tree are basic blocks, lists of
+  * instructions that always run start-to-finish. Each basic block also keeps
+  * track of its successors (blocks which may run immediately after the current
+  * block) and predecessors (blocks which could have run immediately before the
+  * current block). Each function also has a start block and an end block which
+  * all return statements point to (which is always empty). Together, all the
+  * blocks with their predecessors and successors make up the control flow
+  * graph (CFG) of the function. There are helpers that modify the tree of
+  * control flow nodes while modifying the CFG appropriately; these should be
+  * used instead of modifying the tree directly.
+  */
+ 
+ typedef enum {
+    nir_cf_node_block,
+    nir_cf_node_if,
+    nir_cf_node_loop,
+    nir_cf_node_function
+ } nir_cf_node_type;
+ 
+ typedef struct nir_cf_node {
+    struct exec_node node;
+    nir_cf_node_type type;
+    struct nir_cf_node *parent;
+ } nir_cf_node;
+ 
+ typedef struct nir_block {
+    nir_cf_node cf_node;
+ 
+    struct exec_list instr_list; /** < list of nir_instr */
+ 
+    /** generic block index; generated by nir_index_blocks */
+    unsigned index;
+ 
+    /*
+     * Each block can only have up to 2 successors, so we put them in a simple
+     * array - no need for anything more complicated.
+     */
+    struct nir_block *successors[2];
+ 
+    /* Set of nir_block predecessors in the CFG */
+    struct set *predecessors;
+ 
+    /*
+     * this node's immediate dominator in the dominance tree - set to NULL for
+     * the start block.
+     */
+    struct nir_block *imm_dom;
+ 
+    /* This node's children in the dominance tree */
+    unsigned num_dom_children;
+    struct nir_block **dom_children;
+ 
+    /* Set of nir_block's on the dominance frontier of this block */
+    struct set *dom_frontier;
+ 
+    /*
+     * These two indices have the property that dom_{pre,post}_index for each
+     * child of this block in the dominance tree will always be between
+     * dom_pre_index and dom_post_index for this block, which makes testing if
+     * a given block is dominated by another block an O(1) operation.
+     */
+    unsigned dom_pre_index, dom_post_index;
+ 
+    /* live in and out for this block; used for liveness analysis */
+    BITSET_WORD *live_in;
+    BITSET_WORD *live_out;
+ } nir_block;
+ 
+ static inline nir_instr *
+ nir_block_first_instr(nir_block *block)
+ {
+    struct exec_node *head = exec_list_get_head(&block->instr_list);
+    return exec_node_data(nir_instr, head, node);
+ }
+ 
+ static inline nir_instr *
+ nir_block_last_instr(nir_block *block)
+ {
+    struct exec_node *tail = exec_list_get_tail(&block->instr_list);
+    return exec_node_data(nir_instr, tail, node);
+ }
+ 
+ #define nir_foreach_instr(block, instr) \
+    foreach_list_typed(nir_instr, instr, node, &(block)->instr_list)
+ #define nir_foreach_instr_reverse(block, instr) \
+    foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
+ #define nir_foreach_instr_safe(block, instr) \
+    foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
+ #define nir_foreach_instr_reverse_safe(block, instr) \
+    foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list)
+ 
+ typedef struct nir_if {
+    nir_cf_node cf_node;
+    nir_src condition;
+ 
+    struct exec_list then_list; /** < list of nir_cf_node */
+    struct exec_list else_list; /** < list of nir_cf_node */
+ } nir_if;
+ 
+ static inline nir_cf_node *
+ nir_if_first_then_node(nir_if *if_stmt)
+ {
+    struct exec_node *head = exec_list_get_head(&if_stmt->then_list);
+    return exec_node_data(nir_cf_node, head, node);
+ }
+ 
+ static inline nir_cf_node *
+ nir_if_last_then_node(nir_if *if_stmt)
+ {
+    struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list);
+    return exec_node_data(nir_cf_node, tail, node);
+ }
+ 
+ static inline nir_cf_node *
+ nir_if_first_else_node(nir_if *if_stmt)
+ {
+    struct exec_node *head = exec_list_get_head(&if_stmt->else_list);
+    return exec_node_data(nir_cf_node, head, node);
+ }
+ 
+ static inline nir_cf_node *
+ nir_if_last_else_node(nir_if *if_stmt)
+ {
+    struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list);
+    return exec_node_data(nir_cf_node, tail, node);
+ }
+ 
+ typedef struct {
+    nir_cf_node cf_node;
+ 
+    struct exec_list body; /** < list of nir_cf_node */
+ } nir_loop;
+ 
+ static inline nir_cf_node *
+ nir_loop_first_cf_node(nir_loop *loop)
+ {
+    return exec_node_data(nir_cf_node, exec_list_get_head(&loop->body), node);
+ }
+ 
+ static inline nir_cf_node *
+ nir_loop_last_cf_node(nir_loop *loop)
+ {
+    return exec_node_data(nir_cf_node, exec_list_get_tail(&loop->body), node);
+ }
+ 
+ /**
+  * Various bits of metadata that can may be created or required by
+  * optimization and analysis passes
+  */
+ typedef enum {
+    nir_metadata_none = 0x0,
+    nir_metadata_block_index = 0x1,
+    nir_metadata_dominance = 0x2,
+    nir_metadata_live_ssa_defs = 0x4,
+    nir_metadata_not_properly_reset = 0x8,
+ } nir_metadata;
+ 
+ typedef struct {
+    nir_cf_node cf_node;
+ 
+    /** pointer to the function of which this is an implementation */
+    struct nir_function *function;
+ 
+    struct exec_list body; /** < list of nir_cf_node */
+ 
+    nir_block *end_block;
+ 
+    /** list for all local variables in the function */
+    struct exec_list locals;
+ 
+    /** array of variables used as parameters */
+    unsigned num_params;
+    nir_variable **params;
+ 
+    /** variable used to hold the result of the function */
+    nir_variable *return_var;
+ 
+    /** list of local registers in the function */
+    struct exec_list registers;
+ 
+    /** next available local register index */
+    unsigned reg_alloc;
+ 
+    /** next available SSA value index */
+    unsigned ssa_alloc;
+ 
+    /* total number of basic blocks, only valid when block_index_dirty = false */
+    unsigned num_blocks;
+ 
+    nir_metadata valid_metadata;
+ } nir_function_impl;
+ 
+ static inline nir_block *
+ nir_start_block(nir_function_impl *impl)
+ {
+    return (nir_block *) exec_list_get_head(&impl->body);
+ }
+ 
+ static inline nir_cf_node *
+ nir_cf_node_next(nir_cf_node *node)
+ {
+    struct exec_node *next = exec_node_get_next(&node->node);
+    if (exec_node_is_tail_sentinel(next))
+       return NULL;
+    else
+       return exec_node_data(nir_cf_node, next, node);
+ }
+ 
+ static inline nir_cf_node *
+ nir_cf_node_prev(nir_cf_node *node)
+ {
+    struct exec_node *prev = exec_node_get_prev(&node->node);
+    if (exec_node_is_head_sentinel(prev))
+       return NULL;
+    else
+       return exec_node_data(nir_cf_node, prev, node);
+ }
+ 
+ static inline bool
+ nir_cf_node_is_first(const nir_cf_node *node)
+ {
+    return exec_node_is_head_sentinel(node->node.prev);
+ }
+ 
+ static inline bool
+ nir_cf_node_is_last(const nir_cf_node *node)
+ {
+    return exec_node_is_tail_sentinel(node->node.next);
+ }
+ 
+ NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node)
+ NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node)
+ NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node)
+ NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, nir_function_impl, cf_node)
+ 
+ typedef enum {
+    nir_parameter_in,
+    nir_parameter_out,
+    nir_parameter_inout,
+ } nir_parameter_type;
+ 
+ typedef struct {
+    nir_parameter_type param_type;
+    const struct glsl_type *type;
+ } nir_parameter;
+ 
+ typedef struct nir_function {
+    struct exec_node node;
+ 
+    const char *name;
+    struct nir_shader *shader;
+ 
+    unsigned num_params;
+    nir_parameter *params;
+    const struct glsl_type *return_type;
+ 
+    /** The implementation of this function.
+     *
+     * If the function is only declared and not implemented, this is NULL.
+     */
+    nir_function_impl *impl;
+ } nir_function;
+ 
+ typedef struct nir_shader_compiler_options {
+    bool lower_fdiv;
+    bool lower_ffma;
+    bool lower_flrp;
+    bool lower_fpow;
+    bool lower_fsat;
+    bool lower_fsqrt;
+    bool lower_fmod;
+    bool lower_bitfield_extract;
+    bool lower_bitfield_insert;
+    bool lower_uadd_carry;
+    bool lower_usub_borrow;
+    /** lowers fneg and ineg to fsub and isub. */
+    bool lower_negate;
+    /** lowers fsub and isub to fadd+fneg and iadd+ineg. */
+    bool lower_sub;
+ 
+    /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
+    bool lower_scmp;
+ 
+    /* Does the native fdot instruction replicate its result for four
+     * components?  If so, then opt_algebraic_late will turn all fdotN
+     * instructions into fdot_replicatedN instructions.
+     */
+    bool fdot_replicates;
+ 
+    /** lowers ffract to fsub+ffloor: */
+    bool lower_ffract;
+ 
++   bool lower_pack_half_2x16;
++   bool lower_pack_unorm_2x16;
++   bool lower_pack_snorm_2x16;
++   bool lower_pack_unorm_4x8;
++   bool lower_pack_snorm_4x8;
++   bool lower_unpack_half_2x16;
++   bool lower_unpack_unorm_2x16;
++   bool lower_unpack_snorm_2x16;
++   bool lower_unpack_unorm_4x8;
++   bool lower_unpack_snorm_4x8;
++
++   bool lower_extract_byte;
++   bool lower_extract_word;
++
+    /**
+     * Does the driver support real 32-bit integers?  (Otherwise, integers
+     * are simulated by floats.)
+     */
+    bool native_integers;
++
++   /* Indicates that the driver only has zero-based vertex id */
++   bool vertex_id_zero_based;
+ } nir_shader_compiler_options;
+ 
+ typedef struct nir_shader_info {
+    const char *name;
+ 
+    /* Descriptive name provided by the client; may be NULL */
+    const char *label;
+ 
+    /* Number of textures used by this shader */
+    unsigned num_textures;
+    /* Number of uniform buffers used by this shader */
+    unsigned num_ubos;
+    /* Number of atomic buffers used by this shader */
+    unsigned num_abos;
+    /* Number of shader storage buffers used by this shader */
+    unsigned num_ssbos;
+    /* Number of images used by this shader */
+    unsigned num_images;
+ 
+    /* Which inputs are actually read */
+    uint64_t inputs_read;
+    /* Which outputs are actually written */
+    uint64_t outputs_written;
+    /* Which system values are actually read */
+    uint64_t system_values_read;
+ 
+    /* Which patch inputs are actually read */
+    uint32_t patch_inputs_read;
+    /* Which patch outputs are actually written */
+    uint32_t patch_outputs_written;
+ 
+    /* Whether or not this shader ever uses textureGather() */
+    bool uses_texture_gather;
+ 
+    /* Whether or not this shader uses the gl_ClipDistance output */
+    bool uses_clip_distance_out;
+ 
+    /* Whether or not separate shader objects were used */
+    bool separate_shader;
+ 
+    /** Was this shader linked with any transform feedback varyings? */
+    bool has_transform_feedback_varyings;
+ 
+    union {
+       struct {
+          /** The number of vertices recieves per input primitive */
+          unsigned vertices_in;
+ 
+          /** The output primitive type (GL enum value) */
+          unsigned output_primitive;
+ 
+          /** The maximum number of vertices the geometry shader might write. */
+          unsigned vertices_out;
+ 
+          /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
+          unsigned invocations;
+ 
+          /** Whether or not this shader uses EndPrimitive */
+          bool uses_end_primitive;
+ 
+          /** Whether or not this shader uses non-zero streams */
+          bool uses_streams;
+       } gs;
+ 
+       struct {
+          bool uses_discard;
+ 
+          /**
+           * Whether early fragment tests are enabled as defined by
+           * ARB_shader_image_load_store.
+           */
+          bool early_fragment_tests;
+ 
+          /** gl_FragDepth layout for ARB_conservative_depth. */
+          enum gl_frag_depth_layout depth_layout;
+       } fs;
+ 
+       struct {
+          unsigned local_size[3];
+       } cs;
+ 
+       struct {
+          /** The number of vertices in the TCS output patch. */
+          unsigned vertices_out;
+       } tcs;
+    };
+ } nir_shader_info;
+ 
+ typedef struct nir_shader {
+    /** list of uniforms (nir_variable) */
+    struct exec_list uniforms;
+ 
+    /** list of inputs (nir_variable) */
+    struct exec_list inputs;
+ 
+    /** list of outputs (nir_variable) */
+    struct exec_list outputs;
+ 
++   /** list of shared compute variables (nir_variable) */
++   struct exec_list shared;
++
+    /** Set of driver-specific options for the shader.
+     *
+     * The memory for the options is expected to be kept in a single static
+     * copy by the driver.
+     */
+    const struct nir_shader_compiler_options *options;
+ 
+    /** Various bits of compile-time information about a given shader */
+    struct nir_shader_info info;
+ 
+    /** list of global variables in the shader (nir_variable) */
+    struct exec_list globals;
+ 
+    /** list of system value variables in the shader (nir_variable) */
+    struct exec_list system_values;
+ 
+    struct exec_list functions; /** < list of nir_function */
+ 
+    /** list of global register in the shader */
+    struct exec_list registers;
+ 
+    /** next available global register index */
+    unsigned reg_alloc;
+ 
+    /**
+     * the highest index a load_input_*, load_uniform_*, etc. intrinsic can
+     * access plus one
+     */
- -nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
++   unsigned num_inputs, num_uniforms, num_outputs, num_shared;
+ 
+    /** The shader stage, such as MESA_SHADER_VERTEX. */
+    gl_shader_stage stage;
+ } nir_shader;
+ 
+ #define nir_foreach_function(shader, func) \
+    foreach_list_typed(nir_function, func, node, &(shader)->functions)
+ 
+ nir_shader *nir_shader_create(void *mem_ctx,
+                               gl_shader_stage stage,
+                               const nir_shader_compiler_options *options);
+ 
+ /** creates a register, including assigning it an index and adding it to the list */
+ nir_register *nir_global_reg_create(nir_shader *shader);
+ 
+ nir_register *nir_local_reg_create(nir_function_impl *impl);
+ 
+ void nir_reg_remove(nir_register *reg);
+ 
+ /** Adds a variable to the appropreate list in nir_shader */
+ void nir_shader_add_variable(nir_shader *shader, nir_variable *var);
+ 
+ static inline void
+ nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var)
+ {
+    assert(var->data.mode == nir_var_local);
+    exec_list_push_tail(&impl->locals, &var->node);
+ }
+ 
+ /** creates a variable, sets a few defaults, and adds it to the list */
+ nir_variable *nir_variable_create(nir_shader *shader,
+                                   nir_variable_mode mode,
+                                   const struct glsl_type *type,
+                                   const char *name);
+ /** creates a local variable and adds it to the list */
+ nir_variable *nir_local_variable_create(nir_function_impl *impl,
+                                         const struct glsl_type *type,
+                                         const char *name);
+ 
+ /** creates a function and adds it to the shader's list of functions */
+ nir_function *nir_function_create(nir_shader *shader, const char *name);
+ 
+ nir_function_impl *nir_function_impl_create(nir_function *func);
++/** creates a function_impl that isn't tied to any particular function */
++nir_function_impl *nir_function_impl_create_bare(nir_shader *shader);
+ 
+ nir_block *nir_block_create(nir_shader *shader);
+ nir_if *nir_if_create(nir_shader *shader);
+ nir_loop *nir_loop_create(nir_shader *shader);
+ 
+ nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
+ 
+ /** requests that the given pieces of metadata be generated */
+ void nir_metadata_require(nir_function_impl *impl, nir_metadata required);
+ /** dirties all but the preserved metadata */
+ void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
+ 
+ /** creates an instruction with default swizzle/writemask/etc. with NULL registers */
+ nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
+ 
+ nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
+ 
+ nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
+                                                   unsigned num_components);
+ 
+ nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
+                                                 nir_intrinsic_op op);
+ 
+ nir_call_instr *nir_call_instr_create(nir_shader *shader,
+                                       nir_function *callee);
+ 
+ nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
+ 
+ nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
+ 
+ nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
+ 
+ nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
+                                                 unsigned num_components);
+ 
+ nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
+ nir_deref_array *nir_deref_array_create(void *mem_ctx);
+ nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index);
+ 
+ nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref);
+ 
+ nir_load_const_instr *
+ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref);
+ 
+ /**
+  * NIR Cursors and Instruction Insertion API
+  * @{
+  *
+  * A tiny struct representing a point to insert/extract instructions or
+  * control flow nodes.  Helps reduce the combinatorial explosion of possible
+  * points to insert/extract.
+  *
+  * \sa nir_control_flow.h
+  */
+ typedef enum {
+    nir_cursor_before_block,
+    nir_cursor_after_block,
+    nir_cursor_before_instr,
+    nir_cursor_after_instr,
+ } nir_cursor_option;
+ 
+ typedef struct {
+    nir_cursor_option option;
+    union {
+       nir_block *block;
+       nir_instr *instr;
+    };
+ } nir_cursor;
+ 
++static inline nir_block *
++nir_cursor_current_block(nir_cursor cursor)
++{
++   if (cursor.option == nir_cursor_before_instr ||
++       cursor.option == nir_cursor_after_instr) {
++      return cursor.instr->block;
++   } else {
++      return cursor.block;
++   }
++}
++
++bool nir_cursors_equal(nir_cursor a, nir_cursor b);
++
+ static inline nir_cursor
+ nir_before_block(nir_block *block)
+ {
+    nir_cursor cursor;
+    cursor.option = nir_cursor_before_block;
+    cursor.block = block;
+    return cursor;
+ }
+ 
+ static inline nir_cursor
+ nir_after_block(nir_block *block)
+ {
+    nir_cursor cursor;
+    cursor.option = nir_cursor_after_block;
+    cursor.block = block;
+    return cursor;
+ }
+ 
+ static inline nir_cursor
+ nir_before_instr(nir_instr *instr)
+ {
+    nir_cursor cursor;
+    cursor.option = nir_cursor_before_instr;
+    cursor.instr = instr;
+    return cursor;
+ }
+ 
+ static inline nir_cursor
+ nir_after_instr(nir_instr *instr)
+ {
+    nir_cursor cursor;
+    cursor.option = nir_cursor_after_instr;
+    cursor.instr = instr;
+    return cursor;
+ }
+ 
+ static inline nir_cursor
+ nir_after_block_before_jump(nir_block *block)
+ {
+    nir_instr *last_instr = nir_block_last_instr(block);
+    if (last_instr && last_instr->type == nir_instr_type_jump) {
+       return nir_before_instr(last_instr);
+    } else {
+       return nir_after_block(block);
+    }
+ }
+ 
+ static inline nir_cursor
+ nir_before_cf_node(nir_cf_node *node)
+ {
+    if (node->type == nir_cf_node_block)
+       return nir_before_block(nir_cf_node_as_block(node));
+ 
+    return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
+ }
+ 
+ static inline nir_cursor
+ nir_after_cf_node(nir_cf_node *node)
+ {
+    if (node->type == nir_cf_node_block)
+       return nir_after_block(nir_cf_node_as_block(node));
+ 
+    return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
+ }
+ 
++static inline nir_cursor
++nir_after_cf_node_and_phis(nir_cf_node *node)
++{
++   if (node->type == nir_cf_node_block)
++      return nir_after_block(nir_cf_node_as_block(node));
++
++   nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node));
++   assert(block->cf_node.type == nir_cf_node_block);
++
++   nir_foreach_instr(block, instr) {
++      if (instr->type != nir_instr_type_phi)
++         return nir_before_instr(instr);
++   }
++   return nir_after_block(block);
++}
++
+ static inline nir_cursor
+ nir_before_cf_list(struct exec_list *cf_list)
+ {
+    nir_cf_node *first_node = exec_node_data(nir_cf_node,
+                                             exec_list_get_head(cf_list), node);
+    return nir_before_cf_node(first_node);
+ }
+ 
+ static inline nir_cursor
+ nir_after_cf_list(struct exec_list *cf_list)
+ {
+    nir_cf_node *last_node = exec_node_data(nir_cf_node,
+                                            exec_list_get_tail(cf_list), node);
+    return nir_after_cf_node(last_node);
+ }
+ 
+ /**
+  * Insert a NIR instruction at the given cursor.
+  *
+  * Note: This does not update the cursor.
+  */
+ void nir_instr_insert(nir_cursor cursor, nir_instr *instr);
+ 
+ static inline void
+ nir_instr_insert_before(nir_instr *instr, nir_instr *before)
+ {
+    nir_instr_insert(nir_before_instr(instr), before);
+ }
+ 
+ static inline void
+ nir_instr_insert_after(nir_instr *instr, nir_instr *after)
+ {
+    nir_instr_insert(nir_after_instr(instr), after);
+ }
+ 
+ static inline void
+ nir_instr_insert_before_block(nir_block *block, nir_instr *before)
+ {
+    nir_instr_insert(nir_before_block(block), before);
+ }
+ 
+ static inline void
+ nir_instr_insert_after_block(nir_block *block, nir_instr *after)
+ {
+    nir_instr_insert(nir_after_block(block), after);
+ }
+ 
+ static inline void
+ nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
+ {
+    nir_instr_insert(nir_before_cf_node(node), before);
+ }
+ 
+ static inline void
+ nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
+ {
+    nir_instr_insert(nir_after_cf_node(node), after);
+ }
+ 
+ static inline void
+ nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
+ {
+    nir_instr_insert(nir_before_cf_list(list), before);
+ }
+ 
+ static inline void
+ nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
+ {
+    nir_instr_insert(nir_after_cf_list(list), after);
+ }
+ 
+ void nir_instr_remove(nir_instr *instr);
+ 
+ /** @} */
+ 
+ typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
+ typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
+ typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
+ bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
+                          void *state);
+ bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state);
+ bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
+ 
+ nir_const_value *nir_src_as_const_value(nir_src src);
+ bool nir_src_is_dynamically_uniform(nir_src src);
+ bool nir_srcs_equal(nir_src src1, nir_src src2);
+ void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
+ void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
+ void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
+ void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
+                             nir_dest new_dest);
+ 
+ void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
+                        unsigned num_components, const char *name);
+ void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
+                       unsigned num_components, const char *name);
+ void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
+ void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+                                     nir_instr *after_me);
+ 
+ /* visits basic blocks in source-code order */
+ typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
+ bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb,
+                        void *state);
+ bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
+                                void *state);
+ bool nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                                   void *state);
+ 
+ /* If the following CF node is an if, this function returns that if.
+  * Otherwise, it returns NULL.
+  */
+ nir_if *nir_block_get_following_if(nir_block *block);
+ 
+ nir_loop *nir_block_get_following_loop(nir_block *block);
+ 
+ void nir_index_local_regs(nir_function_impl *impl);
+ void nir_index_global_regs(nir_shader *shader);
+ void nir_index_ssa_defs(nir_function_impl *impl);
+ unsigned nir_index_instrs(nir_function_impl *impl);
+ 
+ void nir_index_blocks(nir_function_impl *impl);
+ 
+ void nir_print_shader(nir_shader *shader, FILE *fp);
+ void nir_print_instr(const nir_instr *instr, FILE *fp);
+ 
- -void nir_lower_outputs_to_temporaries(nir_shader *shader);
++nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
++nir_function_impl *nir_function_impl_clone(const nir_function_impl *impl);
++nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
+ 
+ #ifdef DEBUG
+ void nir_validate_shader(nir_shader *shader);
+ void nir_metadata_set_validation_flag(nir_shader *shader);
+ void nir_metadata_check_validation_flag(nir_shader *shader);
+ 
+ #include "util/debug.h"
+ static inline bool
+ should_clone_nir(void)
+ {
+    static int should_clone = -1;
+    if (should_clone < 0)
+       should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
+ 
+    return should_clone;
+ }
+ #else
+ static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
+ static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
+ static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
+ static inline bool should_clone_nir(void) { return false; }
+ #endif /* DEBUG */
+ 
+ #define _PASS(nir, do_pass) do {                                     \
+    do_pass                                                           \
+    nir_validate_shader(nir);                                         \
+    if (should_clone_nir()) {                                         \
+       nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
+       ralloc_free(nir);                                              \
+       nir = clone;                                                   \
+    }                                                                 \
+ } while (0)
+ 
+ #define NIR_PASS(progress, nir, pass, ...) _PASS(nir,                \
+    nir_metadata_set_validation_flag(nir);                            \
+    if (pass(nir, ##__VA_ARGS__)) {                                   \
+       progress = true;                                               \
+       nir_metadata_check_validation_flag(nir);                       \
+    }                                                                 \
+ )
+ 
+ #define NIR_PASS_V(nir, pass, ...) _PASS(nir,                        \
+    pass(nir, ##__VA_ARGS__);                                         \
+ )
+ 
+ void nir_calc_dominance_impl(nir_function_impl *impl);
+ void nir_calc_dominance(nir_shader *shader);
+ 
+ nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
+ bool nir_block_dominates(nir_block *parent, nir_block *child);
+ 
+ void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
+ void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
+ 
+ void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp);
+ void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
+ 
+ void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
+ void nir_dump_cfg(nir_shader *shader, FILE *fp);
+ 
+ int nir_gs_count_vertices(const nir_shader *shader);
+ 
+ bool nir_split_var_copies(nir_shader *shader);
+ 
++bool nir_lower_returns_impl(nir_function_impl *impl);
++bool nir_lower_returns(nir_shader *shader);
++
++bool nir_inline_functions(nir_shader *shader);
++
+ void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
+ void nir_lower_var_copies(nir_shader *shader);
+ 
+ bool nir_lower_global_vars_to_local(nir_shader *shader);
+ 
++bool nir_lower_indirect_derefs(nir_shader *shader, uint32_t mode_mask);
++
+ bool nir_lower_locals_to_regs(nir_shader *shader);
+ 
- -bool nir_remove_dead_variables(nir_shader *shader);
++void nir_lower_outputs_to_temporaries(nir_shader *shader,
++                                      nir_function *entrypoint);
++
++void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
+ 
+ void nir_assign_var_locations(struct exec_list *var_list,
+                               unsigned *size,
+                               int (*type_size)(const struct glsl_type *));
+ 
+ void nir_lower_io(nir_shader *shader,
+                   nir_variable_mode mode,
+                   int (*type_size)(const struct glsl_type *));
+ nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
+ nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
+ 
+ void nir_lower_vars_to_ssa(nir_shader *shader);
+ 
++bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode);
+ 
+ void nir_move_vec_src_uses_to_dest(nir_shader *shader);
+ bool nir_lower_vec_to_movs(nir_shader *shader);
+ void nir_lower_alu_to_scalar(nir_shader *shader);
+ void nir_lower_load_const_to_scalar(nir_shader *shader);
+ 
+ void nir_lower_phis_to_scalar(nir_shader *shader);
+ 
+ void nir_lower_samplers(nir_shader *shader,
+                         const struct gl_shader_program *shader_program);
+ 
+ bool nir_lower_system_values(nir_shader *shader);
+ 
+ typedef struct nir_lower_tex_options {
+    /**
+     * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
+     * sampler types a texture projector is lowered.
+     */
+    unsigned lower_txp;
+ 
+    /**
+     * If true, lower rect textures to 2D, using txs to fetch the
+     * texture dimensions and dividing the texture coords by the
+     * texture dims to normalize.
+     */
+    bool lower_rect;
+ 
+    /**
+     * To emulate certain texture wrap modes, this can be used
+     * to saturate the specified tex coord to [0.0, 1.0].  The
+     * bits are according to sampler #, ie. if, for example:
+     *
+     *   (conf->saturate_s & (1 << n))
+     *
+     * is true, then the s coord for sampler n is saturated.
+     *
+     * Note that clamping must happen *after* projector lowering
+     * so any projected texture sample instruction with a clamped
+     * coordinate gets automatically lowered, regardless of the
+     * 'lower_txp' setting.
+     */
+    unsigned saturate_s;
+    unsigned saturate_t;
+    unsigned saturate_r;
+ 
+    /* Bitmask of samplers that need swizzling.
+     *
+     * If (swizzle_result & (1 << sampler_index)), then the swizzle in
+     * swizzles[sampler_index] is applied to the result of the texturing
+     * operation.
+     */
+    unsigned swizzle_result;
+ 
+    /* A swizzle for each sampler.  Values 0-3 represent x, y, z, or w swizzles
+     * while 4 and 5 represent 0 and 1 respectively.
+     */
+    uint8_t swizzles[32][4];
+ } nir_lower_tex_options;
+ 
+ bool nir_lower_tex(nir_shader *shader,
+                    const nir_lower_tex_options *options);
+ 
+ void nir_lower_idiv(nir_shader *shader);
+ 
+ void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
+ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
+ 
+ void nir_lower_two_sided_color(nir_shader *shader);
+ 
+ void nir_lower_atomics(nir_shader *shader,
+                        const struct gl_shader_program *shader_program);
+ void nir_lower_to_source_mods(nir_shader *shader);
+ 
+ bool nir_lower_gs_intrinsics(nir_shader *shader);
+ 
+ bool nir_normalize_cubemap_coords(nir_shader *shader);
+ 
+ void nir_live_ssa_defs_impl(nir_function_impl *impl);
+ bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
+ 
+ void nir_convert_to_ssa_impl(nir_function_impl *impl);
+ void nir_convert_to_ssa(nir_shader *shader);
++
++bool nir_repair_ssa_impl(nir_function_impl *impl);
++bool nir_repair_ssa(nir_shader *shader);
+ 
+ /* If phi_webs_only is true, only convert SSA values involved in phi nodes to
+  * registers.  If false, convert all values (even those not involved in a phi
+  * node) to registers.
+  */
+ void nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
+ 
+ bool nir_opt_algebraic(nir_shader *shader);
+ bool nir_opt_algebraic_late(nir_shader *shader);
+ bool nir_opt_constant_folding(nir_shader *shader);
+ 
+ bool nir_opt_global_to_local(nir_shader *shader);
+ 
+ bool nir_copy_prop(nir_shader *shader);
+ 
+ bool nir_opt_cse(nir_shader *shader);
+ 
+ bool nir_opt_dce(nir_shader *shader);
+ 
+ bool nir_opt_dead_cf(nir_shader *shader);
+ 
+ void nir_opt_gcm(nir_shader *shader);
+ 
+ bool nir_opt_peephole_select(nir_shader *shader);
+ 
+ bool nir_opt_remove_phis(nir_shader *shader);
+ 
+ bool nir_opt_undef(nir_shader *shader);
+ 
+ void nir_sweep(nir_shader *shader);
+ 
+ nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
+ gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
+ 
+ #ifdef __cplusplus
+ } /* extern "C" */
+ #endif
diff --cc src/compiler/nir/nir_algebraic.py

index 0000000000000000000000000000000000000000,a30652f2afdb8d16309614c7437fe8514c7e07be..14c0e822ad819eb3d173209eecaf0457afa3c3d3

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@@ -1,0 -1,305 +1,305 @@@
- -         return hex(struct.unpack('I', struct.pack('i', self.value))[0])
+ #! /usr/bin/env python
+ #
+ # Copyright (C) 2014 Intel Corporation
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a
+ # copy of this software and associated documentation files (the "Software"),
+ # to deal in the Software without restriction, including without limitation
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ # and/or sell copies of the Software, and to permit persons to whom the
+ # Software is furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice (including the next
+ # paragraph) shall be included in all copies or substantial portions of the
+ # Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ # IN THE SOFTWARE.
+ #
+ # Authors:
+ #    Jason Ekstrand (jason@jlekstrand.net)
+ 
+ import itertools
+ import struct
+ import sys
+ import mako.template
+ import re
+ 
+ # Represents a set of variables, each with a unique id
+ class VarSet(object):
+    def __init__(self):
+       self.names = {}
+       self.ids = itertools.count()
+       self.immutable = False;
+ 
+    def __getitem__(self, name):
+       if name not in self.names:
+          assert not self.immutable, "Unknown replacement variable: " + name
+          self.names[name] = self.ids.next()
+ 
+       return self.names[name]
+ 
+    def lock(self):
+       self.immutable = True
+ 
+ class Value(object):
+    @staticmethod
+    def create(val, name_base, varset):
+       if isinstance(val, tuple):
+          return Expression(val, name_base, varset)
+       elif isinstance(val, Expression):
+          return val
+       elif isinstance(val, (str, unicode)):
+          return Variable(val, name_base, varset)
+       elif isinstance(val, (bool, int, long, float)):
+          return Constant(val, name_base)
+ 
+    __template = mako.template.Template("""
+ static const ${val.c_type} ${val.name} = {
+    { ${val.type_enum} },
+ % if isinstance(val, Constant):
+    { ${hex(val)} /* ${val.value} */ },
+ % elif isinstance(val, Variable):
+    ${val.index}, /* ${val.var_name} */
+    ${'true' if val.is_constant else 'false'},
+    nir_type_${ val.required_type or 'invalid' },
+ % elif isinstance(val, Expression):
+    nir_op_${val.opcode},
+    { ${', '.join(src.c_ptr for src in val.sources)} },
+ % endif
+ };""")
+ 
+    def __init__(self, name, type_str):
+       self.name = name
+       self.type_str = type_str
+ 
+    @property
+    def type_enum(self):
+       return "nir_search_value_" + self.type_str
+ 
+    @property
+    def c_type(self):
+       return "nir_search_" + self.type_str
+ 
+    @property
+    def c_ptr(self):
+       return "&{0}.value".format(self.name)
+ 
+    def render(self):
+       return self.__template.render(val=self,
+                                     Constant=Constant,
+                                     Variable=Variable,
+                                     Expression=Expression)
+ 
+ class Constant(Value):
+    def __init__(self, val, name):
+       Value.__init__(self, name, "constant")
+       self.value = val
+ 
+    def __hex__(self):
+       # Even if it's an integer, we still need to unpack as an unsigned
+       # int.  This is because, without C99, we can only assign to the first
+       # element of a union in an initializer.
+       if isinstance(self.value, (bool)):
+          return 'NIR_TRUE' if self.value else 'NIR_FALSE'
+       if isinstance(self.value, (int, long)):
++         return hex(struct.unpack('I', struct.pack('i' if self.value < 0 else 'I', self.value))[0])
+       elif isinstance(self.value, float):
+          return hex(struct.unpack('I', struct.pack('f', self.value))[0])
+       else:
+          assert False
+ 
+ _var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)(?:@(?P<type>\w+))?")
+ 
+ class Variable(Value):
+    def __init__(self, val, name, varset):
+       Value.__init__(self, name, "variable")
+ 
+       m = _var_name_re.match(val)
+       assert m and m.group('name') is not None
+ 
+       self.var_name = m.group('name')
+       self.is_constant = m.group('const') is not None
+       self.required_type = m.group('type')
+ 
+       if self.required_type is not None:
+          assert self.required_type in ('float', 'bool', 'int', 'unsigned')
+ 
+       self.index = varset[self.var_name]
+ 
+ class Expression(Value):
+    def __init__(self, expr, name_base, varset):
+       Value.__init__(self, name_base, "expression")
+       assert isinstance(expr, tuple)
+ 
+       self.opcode = expr[0]
+       self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset)
+                        for (i, src) in enumerate(expr[1:]) ]
+ 
+    def render(self):
+       srcs = "\n".join(src.render() for src in self.sources)
+       return srcs + super(Expression, self).render()
+ 
+ _optimization_ids = itertools.count()
+ 
+ condition_list = ['true']
+ 
+ class SearchAndReplace(object):
+    def __init__(self, transform):
+       self.id = _optimization_ids.next()
+ 
+       search = transform[0]
+       replace = transform[1]
+       if len(transform) > 2:
+          self.condition = transform[2]
+       else:
+          self.condition = 'true'
+ 
+       if self.condition not in condition_list:
+          condition_list.append(self.condition)
+       self.condition_index = condition_list.index(self.condition)
+ 
+       varset = VarSet()
+       if isinstance(search, Expression):
+          self.search = search
+       else:
+          self.search = Expression(search, "search{0}".format(self.id), varset)
+ 
+       varset.lock()
+ 
+       if isinstance(replace, Value):
+          self.replace = replace
+       else:
+          self.replace = Value.create(replace, "replace{0}".format(self.id), varset)
+ 
+ _algebraic_pass_template = mako.template.Template("""
+ #include "nir.h"
+ #include "nir_search.h"
+ 
+ #ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+ #define NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+ 
+ struct transform {
+    const nir_search_expression *search;
+    const nir_search_value *replace;
+    unsigned condition_offset;
+ };
+ 
+ struct opt_state {
+    void *mem_ctx;
+    bool progress;
+    const bool *condition_flags;
+ };
+ 
+ #endif
+ 
+ % for (opcode, xform_list) in xform_dict.iteritems():
+ % for xform in xform_list:
+    ${xform.search.render()}
+    ${xform.replace.render()}
+ % endfor
+ 
+ static const struct transform ${pass_name}_${opcode}_xforms[] = {
+ % for xform in xform_list:
+    { &${xform.search.name}, ${xform.replace.c_ptr}, ${xform.condition_index} },
+ % endfor
+ };
+ % endfor
+ 
+ static bool
+ ${pass_name}_block(nir_block *block, void *void_state)
+ {
+    struct opt_state *state = void_state;
+ 
+    nir_foreach_instr_safe(block, instr) {
+       if (instr->type != nir_instr_type_alu)
+          continue;
+ 
+       nir_alu_instr *alu = nir_instr_as_alu(instr);
+       if (!alu->dest.dest.is_ssa)
+          continue;
+ 
+       switch (alu->op) {
+       % for opcode in xform_dict.keys():
+       case nir_op_${opcode}:
+          for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) {
+             const struct transform *xform = &${pass_name}_${opcode}_xforms[i];
+             if (state->condition_flags[xform->condition_offset] &&
+                 nir_replace_instr(alu, xform->search, xform->replace,
+                                   state->mem_ctx)) {
+                state->progress = true;
+                break;
+             }
+          }
+          break;
+       % endfor
+       default:
+          break;
+       }
+    }
+ 
+    return true;
+ }
+ 
+ static bool
+ ${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
+ {
+    struct opt_state state;
+ 
+    state.mem_ctx = ralloc_parent(impl);
+    state.progress = false;
+    state.condition_flags = condition_flags;
+ 
+    nir_foreach_block(impl, ${pass_name}_block, &state);
+ 
+    if (state.progress)
+       nir_metadata_preserve(impl, nir_metadata_block_index |
+                                   nir_metadata_dominance);
+ 
+    return state.progress;
+ }
+ 
+ 
+ bool
+ ${pass_name}(nir_shader *shader)
+ {
+    bool progress = false;
+    bool condition_flags[${len(condition_list)}];
+    const nir_shader_compiler_options *options = shader->options;
+ 
+    % for index, condition in enumerate(condition_list):
+    condition_flags[${index}] = ${condition};
+    % endfor
+ 
+    nir_foreach_function(shader, function) {
+       if (function->impl)
+          progress |= ${pass_name}_impl(function->impl, condition_flags);
+    }
+ 
+    return progress;
+ }
+ """)
+ 
+ class AlgebraicPass(object):
+    def __init__(self, pass_name, transforms):
+       self.xform_dict = {}
+       self.pass_name = pass_name
+ 
+       for xform in transforms:
+          if not isinstance(xform, SearchAndReplace):
+             xform = SearchAndReplace(xform)
+ 
+          if xform.search.opcode not in self.xform_dict:
+             self.xform_dict[xform.search.opcode] = []
+ 
+          self.xform_dict[xform.search.opcode].append(xform)
+ 
+    def render(self):
+       return _algebraic_pass_template.render(pass_name=self.pass_name,
+                                              xform_dict=self.xform_dict,
+                                              condition_list=condition_list)
diff --cc src/compiler/nir/nir_builder.h

index 0000000000000000000000000000000000000000,88ba3a1c2697aaaf6178d9bbd22df7629c5fa978..1c7c78acae8883b0b90fc1677b14be81dc62a066

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@@ -1,0 -1,364 +1,441 @@@
+ /*
+  * Copyright © 2014-2015 Broadcom
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  */
+ 
+ #ifndef NIR_BUILDER_H
+ #define NIR_BUILDER_H
+ 
+ #include "nir_control_flow.h"
+ 
+ struct exec_list;
+ 
+ typedef struct nir_builder {
+    nir_cursor cursor;
+ 
+    nir_shader *shader;
+    nir_function_impl *impl;
+ } nir_builder;
+ 
+ static inline void
+ nir_builder_init(nir_builder *build, nir_function_impl *impl)
+ {
+    memset(build, 0, sizeof(*build));
+    build->impl = impl;
+    build->shader = impl->function->shader;
+ }
+ 
+ static inline void
+ nir_builder_init_simple_shader(nir_builder *build, void *mem_ctx,
+                                gl_shader_stage stage,
+                                const nir_shader_compiler_options *options)
+ {
+    build->shader = nir_shader_create(mem_ctx, stage, options);
+    nir_function *func = nir_function_create(build->shader, "main");
+    build->impl = nir_function_impl_create(func);
+    build->cursor = nir_after_cf_list(&build->impl->body);
+ }
+ 
+ static inline void
+ nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
+ {
+    nir_instr_insert(build->cursor, instr);
+ 
+    /* Move the cursor forward. */
+    build->cursor = nir_after_instr(instr);
+ }
+ 
+ static inline void
+ nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf)
+ {
+    nir_cf_node_insert(build->cursor, cf);
+ }
+ 
++static inline nir_ssa_def *
++nir_ssa_undef(nir_builder *build, unsigned num_components)
++{
++   nir_ssa_undef_instr *undef =
++      nir_ssa_undef_instr_create(build->shader, num_components);
++   if (!undef)
++      return NULL;
++
++   nir_instr_insert(nir_before_block(nir_start_block(build->impl)),
++                    &undef->instr);
++
++   return &undef->def;
++}
++
+ static inline nir_ssa_def *
+ nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value)
+ {
+    nir_load_const_instr *load_const =
+       nir_load_const_instr_create(build->shader, num_components);
+    if (!load_const)
+       return NULL;
+ 
+    load_const->value = value;
+ 
+    nir_builder_instr_insert(build, &load_const->instr);
+ 
+    return &load_const->def;
+ }
+ 
+ static inline nir_ssa_def *
+ nir_imm_float(nir_builder *build, float x)
+ {
+    nir_const_value v;
+ 
+    memset(&v, 0, sizeof(v));
+    v.f[0] = x;
+ 
+    return nir_build_imm(build, 1, v);
+ }
+ 
+ static inline nir_ssa_def *
+ nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
+ {
+    nir_const_value v;
+ 
+    memset(&v, 0, sizeof(v));
+    v.f[0] = x;
+    v.f[1] = y;
+    v.f[2] = z;
+    v.f[3] = w;
+ 
+    return nir_build_imm(build, 4, v);
+ }
+ 
+ static inline nir_ssa_def *
+ nir_imm_int(nir_builder *build, int x)
+ {
+    nir_const_value v;
+ 
+    memset(&v, 0, sizeof(v));
+    v.i[0] = x;
+ 
+    return nir_build_imm(build, 1, v);
+ }
+ 
+ static inline nir_ssa_def *
+ nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
+ {
+    nir_const_value v;
+ 
+    memset(&v, 0, sizeof(v));
+    v.i[0] = x;
+    v.i[1] = y;
+    v.i[2] = z;
+    v.i[3] = w;
+ 
+    return nir_build_imm(build, 4, v);
+ }
+ 
+ static inline nir_ssa_def *
+ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
+               nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)
+ {
+    const nir_op_info *op_info = &nir_op_infos[op];
+    nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
+    if (!instr)
+       return NULL;
+ 
+    instr->src[0].src = nir_src_for_ssa(src0);
+    if (src1)
+       instr->src[1].src = nir_src_for_ssa(src1);
+    if (src2)
+       instr->src[2].src = nir_src_for_ssa(src2);
+    if (src3)
+       instr->src[3].src = nir_src_for_ssa(src3);
+ 
+    /* Guess the number of components the destination temporary should have
+     * based on our input sizes, if it's not fixed for the op.
+     */
+    unsigned num_components = op_info->output_size;
+    if (num_components == 0) {
+       for (unsigned i = 0; i < op_info->num_inputs; i++) {
+          if (op_info->input_sizes[i] == 0)
+             num_components = MAX2(num_components,
+                                   instr->src[i].src.ssa->num_components);
+       }
+    }
+    assert(num_components != 0);
+ 
+    /* Make sure we don't swizzle from outside of our source vector (like if a
+     * scalar value was passed into a multiply with a vector).
+     */
+    for (unsigned i = 0; i < op_info->num_inputs; i++) {
+       for (unsigned j = instr->src[i].src.ssa->num_components; j < 4; j++) {
+          instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
+       }
+    }
+ 
+    nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+    instr->dest.write_mask = (1 << num_components) - 1;
+ 
+    nir_builder_instr_insert(build, &instr->instr);
+ 
+    return &instr->dest.dest.ssa;
+ }
+ 
+ #define ALU1(op)                                                          \
+ static inline nir_ssa_def *                                               \
+ nir_##op(nir_builder *build, nir_ssa_def *src0)                           \
+ {                                                                         \
+    return nir_build_alu(build, nir_op_##op, src0, NULL, NULL, NULL);      \
+ }
+ 
+ #define ALU2(op)                                                          \
+ static inline nir_ssa_def *                                               \
+ nir_##op(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)        \
+ {                                                                         \
+    return nir_build_alu(build, nir_op_##op, src0, src1, NULL, NULL);      \
+ }
+ 
+ #define ALU3(op)                                                          \
+ static inline nir_ssa_def *                                               \
+ nir_##op(nir_builder *build, nir_ssa_def *src0,                           \
+          nir_ssa_def *src1, nir_ssa_def *src2)                            \
+ {                                                                         \
+    return nir_build_alu(build, nir_op_##op, src0, src1, src2, NULL);      \
+ }
+ 
+ #define ALU4(op)                                                          \
+ static inline nir_ssa_def *                                               \
+ nir_##op(nir_builder *build, nir_ssa_def *src0,                           \
+          nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)         \
+ {                                                                         \
+    return nir_build_alu(build, nir_op_##op, src0, src1, src2, src3);      \
+ }
+ 
+ #include "nir_builder_opcodes.h"
+ 
+ static inline nir_ssa_def *
+ nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components)
+ {
+    switch (num_components) {
+    case 4:
+       return nir_vec4(build, comp[0], comp[1], comp[2], comp[3]);
+    case 3:
+       return nir_vec3(build, comp[0], comp[1], comp[2]);
+    case 2:
+       return nir_vec2(build, comp[0], comp[1]);
+    case 1:
+       return comp[0];
+    default:
+       unreachable("bad component count");
+       return NULL;
+    }
+ }
+ 
+ /**
+  * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
+  */
+ static inline nir_ssa_def *
+ nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+ {
+    nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov);
+    nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+    mov->dest.write_mask = (1 << num_components) - 1;
+    mov->src[0] = src;
+    nir_builder_instr_insert(build, &mov->instr);
+ 
+    return &mov->dest.dest.ssa;
+ }
+ 
+ static inline nir_ssa_def *
+ nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+ {
+    nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov);
+    nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+    mov->dest.write_mask = (1 << num_components) - 1;
+    mov->src[0] = src;
+    nir_builder_instr_insert(build, &mov->instr);
+ 
+    return &mov->dest.dest.ssa;
+ }
+ 
+ /**
+  * Construct an fmov or imov that reswizzles the source's components.
+  */
+ static inline nir_ssa_def *
+ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
+             unsigned num_components, bool use_fmov)
+ {
+    nir_alu_src alu_src = { NIR_SRC_INIT };
+    alu_src.src = nir_src_for_ssa(src);
+    for (unsigned i = 0; i < num_components; i++)
+       alu_src.swizzle[i] = swiz[i];
+ 
+    return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
+                      nir_imov_alu(build, alu_src, num_components);
+ }
+ 
++/* Selects the right fdot given the number of components in each source. */
++static inline nir_ssa_def *
++nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)
++{
++   assert(src0->num_components == src1->num_components);
++   switch (src0->num_components) {
++   case 1: return nir_fmul(build, src0, src1);
++   case 2: return nir_fdot2(build, src0, src1);
++   case 3: return nir_fdot3(build, src0, src1);
++   case 4: return nir_fdot4(build, src0, src1);
++   default:
++      unreachable("bad component size");
++   }
++
++   return NULL;
++}
++
+ static inline nir_ssa_def *
+ nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
+ {
+    unsigned swizzle[4] = {c, c, c, c};
+    return nir_swizzle(b, def, swizzle, 1, false);
+ }
+ 
+ /**
+  * Turns a nir_src into a nir_ssa_def * so it can be passed to
+  * nir_build_alu()-based builder calls.
+  *
+  * See nir_ssa_for_alu_src() for alu instructions.
+  */
+ static inline nir_ssa_def *
+ nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
+ {
+    if (src.is_ssa && src.ssa->num_components == num_components)
+       return src.ssa;
+ 
+    nir_alu_src alu = { NIR_SRC_INIT };
+    alu.src = src;
+    for (int j = 0; j < 4; j++)
+       alu.swizzle[j] = j;
+ 
+    return nir_imov_alu(build, alu, num_components);
+ }
+ 
+ /**
+  * Similar to nir_ssa_for_src(), but for alu src's, respecting the
+  * nir_alu_src's swizzle.
+  */
+ static inline nir_ssa_def *
+ nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn)
+ {
+    static uint8_t trivial_swizzle[4] = { 0, 1, 2, 3 };
+    nir_alu_src *src = &instr->src[srcn];
+    unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn);
+ 
+    if (src->src.is_ssa && (src->src.ssa->num_components == num_components) &&
+        !src->abs && !src->negate &&
+        (memcmp(src->swizzle, trivial_swizzle, num_components) == 0))
+       return src->src.ssa;
+ 
+    return nir_imov_alu(build, *src, num_components);
+ }
+ 
+ static inline nir_ssa_def *
+ nir_load_var(nir_builder *build, nir_variable *var)
+ {
+    const unsigned num_components = glsl_get_vector_elements(var->type);
+ 
+    nir_intrinsic_instr *load =
+       nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
+    load->num_components = num_components;
+    load->variables[0] = nir_deref_var_create(load, var);
+    nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+    nir_builder_instr_insert(build, &load->instr);
+    return &load->dest.ssa;
+ }
+ 
+ static inline void
+ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,
+               unsigned writemask)
+ {
+    const unsigned num_components = glsl_get_vector_elements(var->type);
+ 
+    nir_intrinsic_instr *store =
+       nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
+    store->num_components = num_components;
+    store->const_index[0] = writemask;
+    store->variables[0] = nir_deref_var_create(store, var);
+    store->src[0] = nir_src_for_ssa(value);
+    nir_builder_instr_insert(build, &store->instr);
+ }
+ 
++static inline void
++nir_store_deref_var(nir_builder *build, nir_deref_var *deref,
++                    nir_ssa_def *value, unsigned writemask)
++{
++   const unsigned num_components =
++      glsl_get_vector_elements(nir_deref_tail(&deref->deref)->type);
++
++   nir_intrinsic_instr *store =
++      nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
++   store->num_components = num_components;
++   store->const_index[0] = writemask & ((1 << num_components) - 1);
++   store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &deref->deref));
++   store->src[0] = nir_src_for_ssa(value);
++   nir_builder_instr_insert(build, &store->instr);
++}
++
++static inline void
++nir_copy_deref_var(nir_builder *build, nir_deref_var *dest, nir_deref_var *src)
++{
++   assert(nir_deref_tail(&dest->deref)->type ==
++          nir_deref_tail(&src->deref)->type);
++
++   nir_intrinsic_instr *copy =
++      nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var);
++   copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref));
++   copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref));
++   nir_builder_instr_insert(build, &copy->instr);
++}
++
++static inline void
++nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src)
++{
++   nir_intrinsic_instr *copy =
++      nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var);
++   copy->variables[0] = nir_deref_var_create(copy, dest);
++   copy->variables[1] = nir_deref_var_create(copy, src);
++   nir_builder_instr_insert(build, &copy->instr);
++}
++
+ static inline nir_ssa_def *
+ nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
+ {
+    nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op);
+    load->num_components = nir_intrinsic_infos[op].dest_components;
+    load->const_index[0] = index;
+    nir_ssa_dest_init(&load->instr, &load->dest,
+                      nir_intrinsic_infos[op].dest_components, NULL);
+    nir_builder_instr_insert(build, &load->instr);
+    return &load->dest.ssa;
+ }
+ 
++static inline void
++nir_jump(nir_builder *build, nir_jump_type jump_type)
++{
++   nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type);
++   nir_builder_instr_insert(build, &jump->instr);
++}
++
+ #endif /* NIR_BUILDER_H */
diff --cc src/compiler/nir/nir_clone.c

index 0000000000000000000000000000000000000000,5eff743d835d6f69cdfd83036a72fa113a7bde81..bc6df56b75383a360961df07600b7ba79aa316d6

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@@ -1,0 -1,659 +1,711 @@@
- -   struct hash_table *ptr_table;
+ /*
+  * Copyright © 2015 Red Hat
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  */
+ 
+ #include "nir.h"
+ #include "nir_control_flow_private.h"
+ 
+ /* Secret Decoder Ring:
+  *   clone_foo():
+  *        Allocate and clone a foo.
+  *   __clone_foo():
+  *        Clone body of foo (ie. parent class, embedded struct, etc)
+  */
+ 
+ typedef struct {
++   /* True if we are cloning an entire shader. */
++   bool global_clone;
++
+    /* maps orig ptr -> cloned ptr: */
- -init_clone_state(clone_state *state)
++   struct hash_table *remap_table;
+ 
+    /* List of phi sources. */
+    struct list_head phi_srcs;
+ 
+    /* new shader object, used as memctx for just about everything else: */
+    nir_shader *ns;
+ } clone_state;
+ 
+ static void
- -   state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- -                                              _mesa_key_pointer_equal);
++init_clone_state(clone_state *state, bool global)
+ {
- -   _mesa_hash_table_destroy(state->ptr_table, NULL);
++   state->global_clone = global;
++   state->remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
++                                                _mesa_key_pointer_equal);
+    list_inithead(&state->phi_srcs);
+ }
+ 
+ static void
+ free_clone_state(clone_state *state)
+ {
- -static void *
- -lookup_ptr(clone_state *state, const void *ptr)
++   _mesa_hash_table_destroy(state->remap_table, NULL);
+ }
+ 
- -   entry = _mesa_hash_table_search(state->ptr_table, ptr);
++static inline void *
++_lookup_ptr(clone_state *state, const void *ptr, bool global)
+ {
+    struct hash_entry *entry;
+ 
+    if (!ptr)
+       return NULL;
+ 
- -store_ptr(clone_state *state, void *nptr, const void *ptr)
++   if (!state->global_clone && global)
++      return (void *)ptr;
++
++   entry = _mesa_hash_table_search(state->remap_table, ptr);
+    assert(entry && "Failed to find pointer!");
+    if (!entry)
+       return NULL;
+ 
+    return entry->data;
+ }
+ 
+ static void
- -   _mesa_hash_table_insert(state->ptr_table, ptr, nptr);
++add_remap(clone_state *state, void *nptr, const void *ptr)
++{
++   _mesa_hash_table_insert(state->remap_table, ptr, nptr);
++}
++
++static void *
++remap_local(clone_state *state, const void *ptr)
+ {
- -static nir_constant *
- -clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar)
++   return _lookup_ptr(state, ptr, false);
+ }
+ 
- -      nc->elements[i] = clone_constant(state, c->elements[i], nvar);
++static void *
++remap_global(clone_state *state, const void *ptr)
++{
++   return _lookup_ptr(state, ptr, true);
++}
++
++static nir_register *
++remap_reg(clone_state *state, const nir_register *reg)
++{
++   return _lookup_ptr(state, reg, reg->is_global);
++}
++
++static nir_variable *
++remap_var(clone_state *state, const nir_variable *var)
++{
++   return _lookup_ptr(state, var, var->data.mode != nir_var_local);
++}
++
++nir_constant *
++nir_constant_clone(const nir_constant *c, nir_variable *nvar)
+ {
+    nir_constant *nc = ralloc(nvar, nir_constant);
+ 
+    nc->value = c->value;
+    nc->num_elements = c->num_elements;
+    nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
+    for (unsigned i = 0; i < c->num_elements; i++) {
- -   store_ptr(state, nvar, var);
++      nc->elements[i] = nir_constant_clone(c->elements[i], nvar);
+    }
+ 
+    return nc;
+ }
+ 
+ /* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
+  * having to deal with locals and globals separately:
+  */
+ static nir_variable *
+ clone_variable(clone_state *state, const nir_variable *var)
+ {
+    nir_variable *nvar = rzalloc(state->ns, nir_variable);
- -         clone_constant(state, var->constant_initializer, nvar);
++   add_remap(state, nvar, var);
+ 
+    nvar->type = var->type;
+    nvar->name = ralloc_strdup(nvar, var->name);
+    nvar->data = var->data;
+    nvar->num_state_slots = var->num_state_slots;
+    nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
+    memcpy(nvar->state_slots, var->state_slots,
+           var->num_state_slots * sizeof(nir_state_slot));
+    if (var->constant_initializer) {
+       nvar->constant_initializer =
- -   store_ptr(state, nreg, reg);
++         nir_constant_clone(var->constant_initializer, nvar);
+    }
+    nvar->interface_type = var->interface_type;
+ 
+    return nvar;
+ }
+ 
+ /* clone list of nir_variable: */
+ static void
+ clone_var_list(clone_state *state, struct exec_list *dst,
+                const struct exec_list *list)
+ {
+    exec_list_make_empty(dst);
+    foreach_list_typed(nir_variable, var, node, list) {
+       nir_variable *nvar = clone_variable(state, var);
+       exec_list_push_tail(dst, &nvar->node);
+    }
+ }
+ 
+ /* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create()
+  * to avoid having to deal with locals and globals separately:
+  */
+ static nir_register *
+ clone_register(clone_state *state, const nir_register *reg)
+ {
+    nir_register *nreg = rzalloc(state->ns, nir_register);
- -      nsrc->ssa = lookup_ptr(state, src->ssa);
++   add_remap(state, nreg, reg);
+ 
+    nreg->num_components = reg->num_components;
+    nreg->num_array_elems = reg->num_array_elems;
+    nreg->index = reg->index;
+    nreg->name = ralloc_strdup(nreg, reg->name);
+    nreg->is_global = reg->is_global;
+    nreg->is_packed = reg->is_packed;
+ 
+    /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */
+    list_inithead(&nreg->uses);
+    list_inithead(&nreg->defs);
+    list_inithead(&nreg->if_uses);
+ 
+    return nreg;
+ }
+ 
+ /* clone list of nir_register: */
+ static void
+ clone_reg_list(clone_state *state, struct exec_list *dst,
+                const struct exec_list *list)
+ {
+    exec_list_make_empty(dst);
+    foreach_list_typed(nir_register, reg, node, list) {
+       nir_register *nreg = clone_register(state, reg);
+       exec_list_push_tail(dst, &nreg->node);
+    }
+ }
+ 
+ static void
+ __clone_src(clone_state *state, void *ninstr_or_if,
+             nir_src *nsrc, const nir_src *src)
+ {
+    nsrc->is_ssa = src->is_ssa;
+    if (src->is_ssa) {
- -      nsrc->reg.reg = lookup_ptr(state, src->reg.reg);
++      nsrc->ssa = remap_local(state, src->ssa);
+    } else {
- -      store_ptr(state, &ndst->ssa, &dst->ssa);
++      nsrc->reg.reg = remap_reg(state, src->reg.reg);
+       if (src->reg.indirect) {
+          nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src);
+          __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect);
+       }
+       nsrc->reg.base_offset = src->reg.base_offset;
+    }
+ }
+ 
+ static void
+ __clone_dst(clone_state *state, nir_instr *ninstr,
+             nir_dest *ndst, const nir_dest *dst)
+ {
+    ndst->is_ssa = dst->is_ssa;
+    if (dst->is_ssa) {
+       nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name);
- -      ndst->reg.reg = lookup_ptr(state, dst->reg.reg);
++      add_remap(state, &ndst->ssa, &dst->ssa);
+    } else {
- -   nir_variable *nvar = lookup_ptr(state, dvar->var);
++      ndst->reg.reg = remap_reg(state, dst->reg.reg);
+       if (dst->reg.indirect) {
+          ndst->reg.indirect = ralloc(ninstr, nir_src);
+          __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect);
+       }
+       ndst->reg.base_offset = dst->reg.base_offset;
+    }
+ }
+ 
+ static nir_deref *clone_deref(clone_state *state, const nir_deref *deref,
+                               nir_instr *ninstr, nir_deref *parent);
+ 
+ static nir_deref_var *
+ clone_deref_var(clone_state *state, const nir_deref_var *dvar,
+                 nir_instr *ninstr)
+ {
- -   store_ptr(state, &nlc->def, &lc->def);
++   nir_variable *nvar = remap_var(state, dvar->var);
+    nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar);
+ 
+    if (dvar->deref.child)
+       ndvar->deref.child = clone_deref(state, dvar->deref.child,
+                                        ninstr, &ndvar->deref);
+ 
+    return ndvar;
+ }
+ 
+ static nir_deref_array *
+ clone_deref_array(clone_state *state, const nir_deref_array *darr,
+                   nir_instr *ninstr, nir_deref *parent)
+ {
+    nir_deref_array *ndarr = nir_deref_array_create(parent);
+ 
+    ndarr->deref.type = darr->deref.type;
+    if (darr->deref.child)
+       ndarr->deref.child = clone_deref(state, darr->deref.child,
+                                        ninstr, &ndarr->deref);
+ 
+    ndarr->deref_array_type = darr->deref_array_type;
+    ndarr->base_offset = darr->base_offset;
+    if (ndarr->deref_array_type == nir_deref_array_type_indirect)
+       __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect);
+ 
+    return ndarr;
+ }
+ 
+ static nir_deref_struct *
+ clone_deref_struct(clone_state *state, const nir_deref_struct *dstr,
+                    nir_instr *ninstr, nir_deref *parent)
+ {
+    nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index);
+ 
+    ndstr->deref.type = dstr->deref.type;
+    if (dstr->deref.child)
+       ndstr->deref.child = clone_deref(state, dstr->deref.child,
+                                        ninstr, &ndstr->deref);
+ 
+    return ndstr;
+ }
+ 
+ static nir_deref *
+ clone_deref(clone_state *state, const nir_deref *dref,
+             nir_instr *ninstr, nir_deref *parent)
+ {
+    switch (dref->deref_type) {
+    case nir_deref_type_array:
+       return &clone_deref_array(state, nir_deref_as_array(dref),
+                                 ninstr, parent)->deref;
+    case nir_deref_type_struct:
+       return &clone_deref_struct(state, nir_deref_as_struct(dref),
+                                  ninstr, parent)->deref;
+    default:
+       unreachable("bad deref type");
+       return NULL;
+    }
+ }
+ 
+ static nir_alu_instr *
+ clone_alu(clone_state *state, const nir_alu_instr *alu)
+ {
+    nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
+ 
+    __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
+    nalu->dest.saturate = alu->dest.saturate;
+    nalu->dest.write_mask = alu->dest.write_mask;
+ 
+    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+       __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src);
+       nalu->src[i].negate = alu->src[i].negate;
+       nalu->src[i].abs = alu->src[i].abs;
+       memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
+              sizeof(nalu->src[i].swizzle));
+    }
+ 
+    return nalu;
+ }
+ 
+ static nir_intrinsic_instr *
+ clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr)
+ {
+    nir_intrinsic_instr *nitr =
+       nir_intrinsic_instr_create(state->ns, itr->intrinsic);
+ 
+    unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables;
+    unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs;
+ 
+    if (nir_intrinsic_infos[itr->intrinsic].has_dest)
+       __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest);
+ 
+    nitr->num_components = itr->num_components;
+    memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index));
+ 
+    for (unsigned i = 0; i < num_variables; i++) {
+       nitr->variables[i] = clone_deref_var(state, itr->variables[i],
+                                            &nitr->instr);
+    }
+ 
+    for (unsigned i = 0; i < num_srcs; i++)
+       __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]);
+ 
+    return nitr;
+ }
+ 
+ static nir_load_const_instr *
+ clone_load_const(clone_state *state, const nir_load_const_instr *lc)
+ {
+    nir_load_const_instr *nlc =
+       nir_load_const_instr_create(state->ns, lc->def.num_components);
+ 
+    memcpy(&nlc->value, &lc->value, sizeof(nlc->value));
+ 
- -   store_ptr(state, &nsa->def, &sa->def);
++   add_remap(state, &nlc->def, &lc->def);
+ 
+    return nlc;
+ }
+ 
+ static nir_ssa_undef_instr *
+ clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa)
+ {
+    nir_ssa_undef_instr *nsa =
+       nir_ssa_undef_instr_create(state->ns, sa->def.num_components);
+ 
- -   ntex->sampler_array_size = tex->sampler_array_size;
++   add_remap(state, &nsa->def, &sa->def);
+ 
+    return nsa;
+ }
+ 
+ static nir_tex_instr *
+ clone_tex(clone_state *state, const nir_tex_instr *tex)
+ {
+    nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs);
+ 
+    ntex->sampler_dim = tex->sampler_dim;
+    ntex->dest_type = tex->dest_type;
+    ntex->op = tex->op;
+    __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest);
+    for (unsigned i = 0; i < ntex->num_srcs; i++) {
+       ntex->src[i].src_type = tex->src[i].src_type;
+       __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src);
+    }
+    ntex->coord_components = tex->coord_components;
+    ntex->is_array = tex->is_array;
+    ntex->is_shadow = tex->is_shadow;
+    ntex->is_new_style_shadow = tex->is_new_style_shadow;
+    memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset));
+    ntex->component = tex->component;
++   ntex->texture_index = tex->texture_index;
++   ntex->texture_array_size = tex->texture_array_size;
++   if (tex->texture)
++      ntex->texture = clone_deref_var(state, tex->texture, &ntex->instr);
+    ntex->sampler_index = tex->sampler_index;
- -   nir_function *ncallee = lookup_ptr(state, call->callee);
+    if (tex->sampler)
+       ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr);
+ 
+    return ntex;
+ }
+ 
+ static nir_phi_instr *
+ clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk)
+ {
+    nir_phi_instr *nphi = nir_phi_instr_create(state->ns);
+ 
+    __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest);
+ 
+    /* Cloning a phi node is a bit different from other instructions.  The
+     * sources of phi instructions are the only time where we can use an SSA
+     * def before it is defined.  In order to handle this, we just copy over
+     * the sources from the old phi instruction directly and then fix them up
+     * in a second pass once all the instrutions in the function have been
+     * properly cloned.
+     *
+     * In order to ensure that the copied sources (which are the same as the
+     * old phi instruction's sources for now) don't get inserted into the old
+     * shader's use-def lists, we have to add the phi instruction *before* we
+     * set up its sources.
+     */
+    nir_instr_insert_after_block(nblk, &nphi->instr);
+ 
+    foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
+       nir_phi_src *nsrc = ralloc(nphi, nir_phi_src);
+ 
+       /* Just copy the old source for now. */
+       memcpy(nsrc, src, sizeof(*src));
+ 
+       /* Since we're not letting nir_insert_instr handle use/def stuff for us,
+        * we have to set the parent_instr manually.  It doesn't really matter
+        * when we do it, so we might as well do it here.
+        */
+       nsrc->src.parent_instr = &nphi->instr;
+ 
+       /* Stash it in the list of phi sources.  We'll walk this list and fix up
+        * sources at the very end of clone_function_impl.
+        */
+       list_add(&nsrc->src.use_link, &state->phi_srcs);
+ 
+       exec_list_push_tail(&nphi->srcs, &nsrc->node);
+    }
+ 
+    return nphi;
+ }
+ 
+ static nir_jump_instr *
+ clone_jump(clone_state *state, const nir_jump_instr *jmp)
+ {
+    nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type);
+ 
+    return njmp;
+ }
+ 
+ static nir_call_instr *
+ clone_call(clone_state *state, const nir_call_instr *call)
+ {
- -   store_ptr(state, nblk, blk);
++   nir_function *ncallee = remap_global(state, call->callee);
+    nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee);
+ 
+    for (unsigned i = 0; i < ncall->num_params; i++)
+       ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr);
+ 
+    ncall->return_deref = clone_deref_var(state, call->return_deref,
+                                          &ncall->instr);
+ 
+    return ncall;
+ }
+ 
+ static nir_instr *
+ clone_instr(clone_state *state, const nir_instr *instr)
+ {
+    switch (instr->type) {
+    case nir_instr_type_alu:
+       return &clone_alu(state, nir_instr_as_alu(instr))->instr;
+    case nir_instr_type_intrinsic:
+       return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr;
+    case nir_instr_type_load_const:
+       return &clone_load_const(state, nir_instr_as_load_const(instr))->instr;
+    case nir_instr_type_ssa_undef:
+       return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr;
+    case nir_instr_type_tex:
+       return &clone_tex(state, nir_instr_as_tex(instr))->instr;
+    case nir_instr_type_phi:
+       unreachable("Cannot clone phis with clone_instr");
+    case nir_instr_type_jump:
+       return &clone_jump(state, nir_instr_as_jump(instr))->instr;
+    case nir_instr_type_call:
+       return &clone_call(state, nir_instr_as_call(instr))->instr;
+    case nir_instr_type_parallel_copy:
+       unreachable("Cannot clone parallel copies");
+    default:
+       unreachable("bad instr type");
+       return NULL;
+    }
+ }
+ 
+ static nir_block *
+ clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk)
+ {
+    /* Don't actually create a new block.  Just use the one from the tail of
+     * the list.  NIR guarantees that the tail of the list is a block and that
+     * no two blocks are side-by-side in the IR;  It should be empty.
+     */
+    nir_block *nblk =
+       exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
+    assert(nblk->cf_node.type == nir_cf_node_block);
+    assert(exec_list_is_empty(&nblk->instr_list));
+ 
+    /* We need this for phi sources */
- -clone_function_impl(clone_state *state, const nir_function_impl *fi,
- -                    nir_function *nfxn)
++   add_remap(state, nblk, blk);
+ 
+    nir_foreach_instr(blk, instr) {
+       if (instr->type == nir_instr_type_phi) {
+          /* Phi instructions are a bit of a special case when cloning because
+           * we don't want inserting the instruction to automatically handle
+           * use/defs for us.  Instead, we need to wait until all the
+           * blocks/instructions are in so that we can set their sources up.
+           */
+          clone_phi(state, nir_instr_as_phi(instr), nblk);
+       } else {
+          nir_instr *ninstr = clone_instr(state, instr);
+          nir_instr_insert_after_block(nblk, ninstr);
+       }
+    }
+ 
+    return nblk;
+ }
+ 
+ static void
+ clone_cf_list(clone_state *state, struct exec_list *dst,
+               const struct exec_list *list);
+ 
+ static nir_if *
+ clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i)
+ {
+    nir_if *ni = nir_if_create(state->ns);
+ 
+    __clone_src(state, ni, &ni->condition, &i->condition);
+ 
+    nir_cf_node_insert_end(cf_list, &ni->cf_node);
+ 
+    clone_cf_list(state, &ni->then_list, &i->then_list);
+    clone_cf_list(state, &ni->else_list, &i->else_list);
+ 
+    return ni;
+ }
+ 
+ static nir_loop *
+ clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop)
+ {
+    nir_loop *nloop = nir_loop_create(state->ns);
+ 
+    nir_cf_node_insert_end(cf_list, &nloop->cf_node);
+ 
+    clone_cf_list(state, &nloop->body, &loop->body);
+ 
+    return nloop;
+ }
+ 
+ /* clone list of nir_cf_node: */
+ static void
+ clone_cf_list(clone_state *state, struct exec_list *dst,
+               const struct exec_list *list)
+ {
+    foreach_list_typed(nir_cf_node, cf, node, list) {
+       switch (cf->type) {
+       case nir_cf_node_block:
+          clone_block(state, dst, nir_cf_node_as_block(cf));
+          break;
+       case nir_cf_node_if:
+          clone_if(state, dst, nir_cf_node_as_if(cf));
+          break;
+       case nir_cf_node_loop:
+          clone_loop(state, dst, nir_cf_node_as_loop(cf));
+          break;
+       default:
+          unreachable("bad cf type");
+       }
+    }
+ }
+ 
+ static nir_function_impl *
- -   nir_function_impl *nfi = nir_function_impl_create(nfxn);
++clone_function_impl(clone_state *state, const nir_function_impl *fi)
+ {
- -      nfi->params[i] = lookup_ptr(state, fi->params[i]);
++   nir_function_impl *nfi = nir_function_impl_create_bare(state->ns);
+ 
+    clone_var_list(state, &nfi->locals, &fi->locals);
+    clone_reg_list(state, &nfi->registers, &fi->registers);
+    nfi->reg_alloc = fi->reg_alloc;
+ 
+    nfi->num_params = fi->num_params;
+    nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params);
+    for (unsigned i = 0; i < fi->num_params; i++) {
- -   nfi->return_var = lookup_ptr(state, fi->return_var);
++      nfi->params[i] = remap_local(state, fi->params[i]);
+    }
- -      src->pred = lookup_ptr(state, src->pred);
++   nfi->return_var = remap_local(state, fi->return_var);
+ 
+    assert(list_empty(&state->phi_srcs));
+ 
+    clone_cf_list(state, &nfi->body, &fi->body);
+ 
+    /* After we've cloned almost everything, we have to walk the list of phi
+     * sources and fix them up.  Thanks to loops, the block and SSA value for a
+     * phi source may not be defined when we first encounter it.  Instead, we
+     * add it to the phi_srcs list and we fix it up here.
+     */
+    list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) {
- -      src->src.ssa = lookup_ptr(state, src->src.ssa);
++      src->pred = remap_local(state, src->pred);
+       assert(src->src.is_ssa);
- -   store_ptr(state, nfxn, fxn);
++      src->src.ssa = remap_local(state, src->src.ssa);
+ 
+       /* Remove from this list and place in the uses of the SSA def */
+       list_del(&src->src.use_link);
+       list_addtail(&src->src.use_link, &src->src.ssa->uses);
+    }
+    assert(list_empty(&state->phi_srcs));
+ 
+    /* All metadata is invalidated in the cloning process */
+    nfi->valid_metadata = 0;
+ 
+    return nfi;
+ }
+ 
++nir_function_impl *
++nir_function_impl_clone(const nir_function_impl *fi)
++{
++   clone_state state;
++   init_clone_state(&state, false);
++
++   /* We use the same shader */
++   state.ns = fi->function->shader;
++
++   nir_function_impl *nfi = clone_function_impl(&state, fi);
++
++   free_clone_state(&state);
++
++   return nfi;
++}
++
+ static nir_function *
+ clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns)
+ {
+    assert(ns == state->ns);
+    nir_function *nfxn = nir_function_create(ns, fxn->name);
+ 
+    /* Needed for call instructions */
- -   init_clone_state(&state);
++   add_remap(state, nfxn, fxn);
+ 
+    nfxn->num_params = fxn->num_params;
+    nfxn->params = ralloc_array(state->ns, nir_parameter, fxn->num_params);
+    memcpy(nfxn->params, fxn->params, sizeof(nir_parameter) * fxn->num_params);
+ 
+    nfxn->return_type = fxn->return_type;
+ 
+    /* At first glance, it looks like we should clone the function_impl here.
+     * However, call instructions need to be able to reference at least the
+     * function and those will get processed as we clone the function_impl's.
+     * We stop here and do function_impls as a second pass.
+     */
+ 
+    return nfxn;
+ }
+ 
+ nir_shader *
+ nir_shader_clone(void *mem_ctx, const nir_shader *s)
+ {
+    clone_state state;
- -      nir_function *nfxn = lookup_ptr(&state, fxn);
- -      clone_function_impl(&state, fxn->impl, nfxn);
++   init_clone_state(&state, true);
+ 
+    nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options);
+    state.ns = ns;
+ 
+    clone_var_list(&state, &ns->uniforms, &s->uniforms);
+    clone_var_list(&state, &ns->inputs,   &s->inputs);
+    clone_var_list(&state, &ns->outputs,  &s->outputs);
++   clone_var_list(&state, &ns->shared,   &s->shared);
+    clone_var_list(&state, &ns->globals,  &s->globals);
+    clone_var_list(&state, &ns->system_values, &s->system_values);
+ 
+    /* Go through and clone functions */
+    foreach_list_typed(nir_function, fxn, node, &s->functions)
+       clone_function(&state, fxn, ns);
+ 
+    /* Only after all functions are cloned can we clone the actual function
+     * implementations.  This is because nir_call_instr's need to reference the
+     * functions of other functions and we don't know what order the functions
+     * will have in the list.
+     */
+    nir_foreach_function(s, fxn) {
++      nir_function *nfxn = remap_global(&state, fxn);
++      nfxn->impl = clone_function_impl(&state, fxn->impl);
++      nfxn->impl->function = nfxn;
+    }
+ 
+    clone_reg_list(&state, &ns->registers, &s->registers);
+    ns->reg_alloc = s->reg_alloc;
+ 
+    ns->info = s->info;
+    ns->info.name = ralloc_strdup(ns, ns->info.name);
+    if (ns->info.label)
+       ns->info.label = ralloc_strdup(ns, ns->info.label);
+ 
+    ns->num_inputs = s->num_inputs;
+    ns->num_uniforms = s->num_uniforms;
+    ns->num_outputs = s->num_outputs;
++   ns->num_shared = s->num_shared;
+ 
+    free_clone_state(&state);
+ 
+    return ns;
+ }
diff --cc src/compiler/nir/nir_control_flow.c

index 0000000000000000000000000000000000000000,96395a4161564c66cf22a3445e2e2b51d6d01b11..33b06d0cc846d5c41b32233ca02ba1c817a27960

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_control_flow.c
+++ b/src/compiler/nir/nir_control_flow.c
@@@ -1,0 -1,808 +1,820 @@@
- -      } else {
- -         assert(parent->type == nir_cf_node_loop);
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ #include "nir_control_flow_private.h"
+ 
+ /**
+  * \name Control flow modification
+  *
+  * These functions modify the control flow tree while keeping the control flow
+  * graph up-to-date. The invariants respected are:
+  * 1. Each then statement, else statement, or loop body must have at least one
+  *    control flow node.
+  * 2. Each if-statement and loop must have one basic block before it and one
+  *    after.
+  * 3. Two basic blocks cannot be directly next to each other.
+  * 4. If a basic block has a jump instruction, there must be only one and it
+  *    must be at the end of the block.
+  * 5. The CFG must always be connected - this means that we must insert a fake
+  *    CFG edge for loops with no break statement.
+  *
+  * The purpose of the second one is so that we have places to insert code during
+  * GCM, as well as eliminating the possibility of critical edges.
+  */
+ /*@{*/
+ 
+ static bool
+ block_ends_in_jump(nir_block *block)
+ {
+    return !exec_list_is_empty(&block->instr_list) &&
+           nir_block_last_instr(block)->type == nir_instr_type_jump;
+ }
+ 
+ static inline void
+ block_add_pred(nir_block *block, nir_block *pred)
+ {
+    _mesa_set_add(block->predecessors, pred);
+ }
+ 
+ static inline void
+ block_remove_pred(nir_block *block, nir_block *pred)
+ {
+    struct set_entry *entry = _mesa_set_search(block->predecessors, pred);
+ 
+    assert(entry);
+ 
+    _mesa_set_remove(block->predecessors, entry);
+ }
+ 
+ static void
+ link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2)
+ {
+    pred->successors[0] = succ1;
+    if (succ1 != NULL)
+       block_add_pred(succ1, pred);
+ 
+    pred->successors[1] = succ2;
+    if (succ2 != NULL)
+       block_add_pred(succ2, pred);
+ }
+ 
+ static void
+ unlink_blocks(nir_block *pred, nir_block *succ)
+ {
+    if (pred->successors[0] == succ) {
+       pred->successors[0] = pred->successors[1];
+       pred->successors[1] = NULL;
+    } else {
+       assert(pred->successors[1] == succ);
+       pred->successors[1] = NULL;
+    }
+ 
+    block_remove_pred(succ, pred);
+ }
+ 
+ static void
+ unlink_block_successors(nir_block *block)
+ {
+    if (block->successors[1] != NULL)
+       unlink_blocks(block, block->successors[1]);
+    if (block->successors[0] != NULL)
+       unlink_blocks(block, block->successors[0]);
+ }
+ 
+ static void
+ link_non_block_to_block(nir_cf_node *node, nir_block *block)
+ {
+    if (node->type == nir_cf_node_if) {
+       /*
+        * We're trying to link an if to a block after it; this just means linking
+        * the last block of the then and else branches.
+        */
+ 
+       nir_if *if_stmt = nir_cf_node_as_if(node);
+ 
+       nir_cf_node *last_then = nir_if_last_then_node(if_stmt);
+       assert(last_then->type == nir_cf_node_block);
+       nir_block *last_then_block = nir_cf_node_as_block(last_then);
+ 
+       nir_cf_node *last_else = nir_if_last_else_node(if_stmt);
+       assert(last_else->type == nir_cf_node_block);
+       nir_block *last_else_block = nir_cf_node_as_block(last_else);
+ 
+       if (!block_ends_in_jump(last_then_block)) {
+          unlink_block_successors(last_then_block);
+          link_blocks(last_then_block, block, NULL);
+       }
+ 
+       if (!block_ends_in_jump(last_else_block)) {
+          unlink_block_successors(last_else_block);
+          link_blocks(last_else_block, block, NULL);
+       }
+    } else {
+       assert(node->type == nir_cf_node_loop);
+ 
+       /*
+        * We can only get to this codepath if we're inserting a new loop, or
+        * at least a loop with no break statements; we can't insert break
+        * statements into a loop when we haven't inserted it into the CFG
+        * because we wouldn't know which block comes after the loop
+        * and therefore, which block should be the successor of the block with
+        * the break). Therefore, we need to insert a fake edge (see invariant
+        * #5).
+        */
+ 
+       nir_loop *loop = nir_cf_node_as_loop(node);
+ 
+       nir_cf_node *last = nir_loop_last_cf_node(loop);
+       assert(last->type == nir_cf_node_block);
+       nir_block *last_block =  nir_cf_node_as_block(last);
+ 
+       last_block->successors[1] = block;
+       block_add_pred(block, last_block);
+    }
+ }
+ 
+ static void
+ link_block_to_non_block(nir_block *block, nir_cf_node *node)
+ {
+    if (node->type == nir_cf_node_if) {
+       /*
+        * We're trying to link a block to an if after it; this just means linking
+        * the block to the first block of the then and else branches.
+        */
+ 
+       nir_if *if_stmt = nir_cf_node_as_if(node);
+ 
+       nir_cf_node *first_then = nir_if_first_then_node(if_stmt);
+       assert(first_then->type == nir_cf_node_block);
+       nir_block *first_then_block = nir_cf_node_as_block(first_then);
+ 
+       nir_cf_node *first_else = nir_if_first_else_node(if_stmt);
+       assert(first_else->type == nir_cf_node_block);
+       nir_block *first_else_block = nir_cf_node_as_block(first_else);
+ 
+       unlink_block_successors(block);
+       link_blocks(block, first_then_block, first_else_block);
+    } else {
+       /*
+        * For similar reasons as the corresponding case in
+        * link_non_block_to_block(), don't worry about if the loop header has
+        * any predecessors that need to be unlinked.
+        */
+ 
+       assert(node->type == nir_cf_node_loop);
+ 
+       nir_loop *loop = nir_cf_node_as_loop(node);
+ 
+       nir_cf_node *loop_header = nir_loop_first_cf_node(loop);
+       assert(loop_header->type == nir_cf_node_block);
+       nir_block *loop_header_block = nir_cf_node_as_block(loop_header);
+ 
+       unlink_block_successors(block);
+       link_blocks(block, loop_header_block, NULL);
+    }
+ 
+ }
+ 
+ /**
+  * Replace a block's successor with a different one.
+  */
+ static void
+ replace_successor(nir_block *block, nir_block *old_succ, nir_block *new_succ)
+ {
+    if (block->successors[0] == old_succ) {
+       block->successors[0] = new_succ;
+    } else {
+       assert(block->successors[1] == old_succ);
+       block->successors[1] = new_succ;
+    }
+ 
+    block_remove_pred(old_succ, block);
+    block_add_pred(new_succ, block);
+ }
+ 
+ /**
+  * Takes a basic block and inserts a new empty basic block before it, making its
+  * predecessors point to the new block. This essentially splits the block into
+  * an empty header and a body so that another non-block CF node can be inserted
+  * between the two. Note that this does *not* link the two basic blocks, so
+  * some kind of cleanup *must* be performed after this call.
+  */
+ 
+ static nir_block *
+ split_block_beginning(nir_block *block)
+ {
+    nir_block *new_block = nir_block_create(ralloc_parent(block));
+    new_block->cf_node.parent = block->cf_node.parent;
+    exec_node_insert_node_before(&block->cf_node.node, &new_block->cf_node.node);
+ 
+    struct set_entry *entry;
+    set_foreach(block->predecessors, entry) {
+       nir_block *pred = (nir_block *) entry->key;
+       replace_successor(pred, block, new_block);
+    }
+ 
+    /* Any phi nodes must stay part of the new block, or else their
+     * sourcse will be messed up. This will reverse the order of the phi's, but
+     * order shouldn't matter.
+     */
+    nir_foreach_instr_safe(block, instr) {
+       if (instr->type != nir_instr_type_phi)
+          break;
+ 
+       exec_node_remove(&instr->node);
+       instr->block = new_block;
+       exec_list_push_head(&new_block->instr_list, &instr->node);
+    }
+ 
+    return new_block;
+ }
+ 
+ static void
+ rewrite_phi_preds(nir_block *block, nir_block *old_pred, nir_block *new_pred)
+ {
+    nir_foreach_instr_safe(block, instr) {
+       if (instr->type != nir_instr_type_phi)
+          break;
+ 
+       nir_phi_instr *phi = nir_instr_as_phi(instr);
+       nir_foreach_phi_src(phi, src) {
+          if (src->pred == old_pred) {
+             src->pred = new_pred;
+             break;
+          }
+       }
+    }
+ }
+ 
+ static void
+ insert_phi_undef(nir_block *block, nir_block *pred)
+ {
+    nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+    nir_foreach_instr(block, instr) {
+       if (instr->type != nir_instr_type_phi)
+          break;
+ 
+       nir_phi_instr *phi = nir_instr_as_phi(instr);
+       nir_ssa_undef_instr *undef =
+          nir_ssa_undef_instr_create(ralloc_parent(phi),
+                                     phi->dest.ssa.num_components);
+       nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
+       nir_phi_src *src = ralloc(phi, nir_phi_src);
+       src->pred = pred;
+       src->src.parent_instr = &phi->instr;
+       src->src.is_ssa = true;
+       src->src.ssa = &undef->def;
+ 
+       list_addtail(&src->src.use_link, &undef->def.uses);
+ 
+       exec_list_push_tail(&phi->srcs, &src->node);
+    }
+ }
+ 
+ /**
+  * Moves the successors of source to the successors of dest, leaving both
+  * successors of source NULL.
+  */
+ 
+ static void
+ move_successors(nir_block *source, nir_block *dest)
+ {
+    nir_block *succ1 = source->successors[0];
+    nir_block *succ2 = source->successors[1];
+ 
+    if (succ1) {
+       unlink_blocks(source, succ1);
+       rewrite_phi_preds(succ1, source, dest);
+    }
+ 
+    if (succ2) {
+       unlink_blocks(source, succ2);
+       rewrite_phi_preds(succ2, source, dest);
+    }
+ 
+    unlink_block_successors(dest);
+    link_blocks(dest, succ1, succ2);
+ }
+ 
+ /* Given a basic block with no successors that has been inserted into the
+  * control flow tree, gives it the successors it would normally have assuming
+  * it doesn't end in a jump instruction. Also inserts phi sources with undefs
+  * if necessary.
+  */
+ static void
+ block_add_normal_succs(nir_block *block)
+ {
+    if (exec_node_is_tail_sentinel(block->cf_node.node.next)) {
+       nir_cf_node *parent = block->cf_node.parent;
+       if (parent->type == nir_cf_node_if) {
+          nir_cf_node *next = nir_cf_node_next(parent);
+          assert(next->type == nir_cf_node_block);
+          nir_block *next_block = nir_cf_node_as_block(next);
+ 
+          link_blocks(block, next_block, NULL);
++      } else if (parent->type == nir_cf_node_loop) {
+          nir_loop *loop = nir_cf_node_as_loop(parent);
+ 
+          nir_cf_node *head = nir_loop_first_cf_node(loop);
+          assert(head->type == nir_cf_node_block);
+          nir_block *head_block = nir_cf_node_as_block(head);
+ 
+          link_blocks(block, head_block, NULL);
+          insert_phi_undef(head_block, block);
++      } else {
++         assert(parent->type == nir_cf_node_function);
++         nir_function_impl *impl = nir_cf_node_as_function(parent);
++         link_blocks(block, impl->end_block, NULL);
+       }
+    } else {
+       nir_cf_node *next = nir_cf_node_next(&block->cf_node);
+       if (next->type == nir_cf_node_if) {
+          nir_if *next_if = nir_cf_node_as_if(next);
+ 
+          nir_cf_node *first_then = nir_if_first_then_node(next_if);
+          assert(first_then->type == nir_cf_node_block);
+          nir_block *first_then_block = nir_cf_node_as_block(first_then);
+ 
+          nir_cf_node *first_else = nir_if_first_else_node(next_if);
+          assert(first_else->type == nir_cf_node_block);
+          nir_block *first_else_block = nir_cf_node_as_block(first_else);
+ 
+          link_blocks(block, first_then_block, first_else_block);
+       } else {
+          assert(next->type == nir_cf_node_loop);
+          nir_loop *next_loop = nir_cf_node_as_loop(next);
+ 
+          nir_cf_node *first = nir_loop_first_cf_node(next_loop);
+          assert(first->type == nir_cf_node_block);
+          nir_block *first_block = nir_cf_node_as_block(first);
+ 
+          link_blocks(block, first_block, NULL);
+          insert_phi_undef(first_block, block);
+       }
+    }
+ }
+ 
+ static nir_block *
+ split_block_end(nir_block *block)
+ {
+    nir_block *new_block = nir_block_create(ralloc_parent(block));
+    new_block->cf_node.parent = block->cf_node.parent;
+    exec_node_insert_after(&block->cf_node.node, &new_block->cf_node.node);
+ 
+    if (block_ends_in_jump(block)) {
+       /* Figure out what successor block would've had if it didn't have a jump
+        * instruction, and make new_block have that successor.
+        */
+       block_add_normal_succs(new_block);
+    } else {
+       move_successors(block, new_block);
+    }
+ 
+    return new_block;
+ }
+ 
+ static nir_block *
+ split_block_before_instr(nir_instr *instr)
+ {
+    assert(instr->type != nir_instr_type_phi);
+    nir_block *new_block = split_block_beginning(instr->block);
+ 
+    nir_foreach_instr_safe(instr->block, cur_instr) {
+       if (cur_instr == instr)
+          break;
+ 
+       exec_node_remove(&cur_instr->node);
+       cur_instr->block = new_block;
+       exec_list_push_tail(&new_block->instr_list, &cur_instr->node);
+    }
+ 
+    return new_block;
+ }
+ 
+ /* Splits a basic block at the point specified by the cursor. The "before" and
+  * "after" arguments are filled out with the blocks resulting from the split
+  * if non-NULL. Note that the "beginning" of the block is actually interpreted
+  * as before the first non-phi instruction, and it's illegal to split a block
+  * before a phi instruction.
+  */
+ 
+ static void
+ split_block_cursor(nir_cursor cursor,
+                    nir_block **_before, nir_block **_after)
+ {
+    nir_block *before, *after;
+    switch (cursor.option) {
+    case nir_cursor_before_block:
+       after = cursor.block;
+       before = split_block_beginning(cursor.block);
+       break;
+ 
+    case nir_cursor_after_block:
+       before = cursor.block;
+       after = split_block_end(cursor.block);
+       break;
+ 
+    case nir_cursor_before_instr:
+       after = cursor.instr->block;
+       before = split_block_before_instr(cursor.instr);
+       break;
+ 
+    case nir_cursor_after_instr:
+       /* We lower this to split_block_before_instr() so that we can keep the
+        * after-a-jump-instr case contained to split_block_end().
+        */
+       if (nir_instr_is_last(cursor.instr)) {
+          before = cursor.instr->block;
+          after = split_block_end(cursor.instr->block);
+       } else {
+          after = cursor.instr->block;
+          before = split_block_before_instr(nir_instr_next(cursor.instr));
+       }
+       break;
+ 
+    default:
+       unreachable("not reached");
+    }
+ 
+    if (_before)
+       *_before = before;
+    if (_after)
+       *_after = after;
+ }
+ 
+ /**
+  * Inserts a non-basic block between two basic blocks and links them together.
+  */
+ 
+ static void
+ insert_non_block(nir_block *before, nir_cf_node *node, nir_block *after)
+ {
+    node->parent = before->cf_node.parent;
+    exec_node_insert_after(&before->cf_node.node, &node->node);
+    link_block_to_non_block(before, node);
+    link_non_block_to_block(node, after);
+ }
+ 
+ /* walk up the control flow tree to find the innermost enclosed loop */
+ static nir_loop *
+ nearest_loop(nir_cf_node *node)
+ {
+    while (node->type != nir_cf_node_loop) {
+       node = node->parent;
+    }
+ 
+    return nir_cf_node_as_loop(node);
+ }
+ 
+ /*
+  * update the CFG after a jump instruction has been added to the end of a block
+  */
+ 
+ void
+ nir_handle_add_jump(nir_block *block)
+ {
+    nir_instr *instr = nir_block_last_instr(block);
+    nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
+ 
+    unlink_block_successors(block);
+ 
+    nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+    nir_metadata_preserve(impl, nir_metadata_none);
+ 
+    if (jump_instr->type == nir_jump_break ||
+        jump_instr->type == nir_jump_continue) {
+       nir_loop *loop = nearest_loop(&block->cf_node);
+ 
+       if (jump_instr->type == nir_jump_continue) {
+          nir_cf_node *first_node = nir_loop_first_cf_node(loop);
+          assert(first_node->type == nir_cf_node_block);
+          nir_block *first_block = nir_cf_node_as_block(first_node);
+          link_blocks(block, first_block, NULL);
+       } else {
+          nir_cf_node *after = nir_cf_node_next(&loop->cf_node);
+          assert(after->type == nir_cf_node_block);
+          nir_block *after_block = nir_cf_node_as_block(after);
+          link_blocks(block, after_block, NULL);
+ 
+          /* If we inserted a fake link, remove it */
+          nir_cf_node *last = nir_loop_last_cf_node(loop);
+          assert(last->type == nir_cf_node_block);
+          nir_block *last_block =  nir_cf_node_as_block(last);
+          if (last_block->successors[1] != NULL)
+             unlink_blocks(last_block, after_block);
+       }
+    } else {
+       assert(jump_instr->type == nir_jump_return);
+       link_blocks(block, impl->end_block, NULL);
+    }
+ }
+ 
+ static void
+ remove_phi_src(nir_block *block, nir_block *pred)
+ {
+    nir_foreach_instr(block, instr) {
+       if (instr->type != nir_instr_type_phi)
+          break;
+ 
+       nir_phi_instr *phi = nir_instr_as_phi(instr);
+       nir_foreach_phi_src_safe(phi, src) {
+          if (src->pred == pred) {
+             list_del(&src->src.use_link);
+             exec_node_remove(&src->node);
+          }
+       }
+    }
+ }
+ 
+ /* Removes the successor of a block with a jump, and inserts a fake edge for
+  * infinite loops. Note that the jump to be eliminated may be free-floating.
+  */
+ 
+ static void
+ unlink_jump(nir_block *block, nir_jump_type type, bool add_normal_successors)
+ {
+    nir_block *next = block->successors[0];
+ 
+    if (block->successors[0])
+       remove_phi_src(block->successors[0], block);
+    if (block->successors[1])
+       remove_phi_src(block->successors[1], block);
+ 
+    unlink_block_successors(block);
+    if (add_normal_successors)
+       block_add_normal_succs(block);
+ 
+    /* If we've just removed a break, and the block we were jumping to (after
+     * the loop) now has zero predecessors, we've created a new infinite loop.
+     *
+     * NIR doesn't allow blocks (other than the start block) to have zero
+     * predecessors.  In particular, dominance assumes all blocks are reachable.
+     * So, we insert a "fake link" by making successors[1] point after the loop.
+     *
+     * Note that we have to do this after unlinking/recreating the block's
+     * successors.  If we removed a "break" at the end of the loop, then
+     * block == last_block, so block->successors[0] would already be "next",
+     * and adding a fake link would create two identical successors.  Doing
+     * this afterward works, as we'll have changed block->successors[0] to
+     * be the top of the loop.
+     */
+    if (type == nir_jump_break && next->predecessors->entries == 0) {
+       nir_loop *loop =
+          nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node));
+ 
+       /* insert fake link */
+       nir_cf_node *last = nir_loop_last_cf_node(loop);
+       assert(last->type == nir_cf_node_block);
+       nir_block *last_block = nir_cf_node_as_block(last);
+ 
+       last_block->successors[1] = next;
+       block_add_pred(next, last_block);
+    }
+ }
+ 
+ void
+ nir_handle_remove_jump(nir_block *block, nir_jump_type type)
+ {
+    unlink_jump(block, type, true);
+ 
+    nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+    nir_metadata_preserve(impl, nir_metadata_none);
+ }
+ 
+ static void
+ update_if_uses(nir_cf_node *node)
+ {
+    if (node->type != nir_cf_node_if)
+       return;
+ 
+    nir_if *if_stmt = nir_cf_node_as_if(node);
+ 
+    if_stmt->condition.parent_if = if_stmt;
+    if (if_stmt->condition.is_ssa) {
+       list_addtail(&if_stmt->condition.use_link,
+                    &if_stmt->condition.ssa->if_uses);
+    } else {
+       list_addtail(&if_stmt->condition.use_link,
+                    &if_stmt->condition.reg.reg->if_uses);
+    }
+ }
+ 
+ /**
+  * Stitch two basic blocks together into one. The aggregate must have the same
+  * predecessors as the first and the same successors as the second.
+  */
+ 
+ static void
+ stitch_blocks(nir_block *before, nir_block *after)
+ {
+    /*
+     * We move after into before, so we have to deal with up to 2 successors vs.
+     * possibly a large number of predecessors.
+     *
+     * TODO: special case when before is empty and after isn't?
+     */
+ 
+    if (block_ends_in_jump(before)) {
+       assert(exec_list_is_empty(&after->instr_list));
+       if (after->successors[0])
+          remove_phi_src(after->successors[0], after);
+       if (after->successors[1])
+          remove_phi_src(after->successors[1], after);
+       unlink_block_successors(after);
+       exec_node_remove(&after->cf_node.node);
+    } else {
+       move_successors(after, before);
+ 
+       foreach_list_typed(nir_instr, instr, node, &after->instr_list) {
+          instr->block = before;
+       }
+ 
+       exec_list_append(&before->instr_list, &after->instr_list);
+       exec_node_remove(&after->cf_node.node);
+    }
+ }
+ 
+ void
+ nir_cf_node_insert(nir_cursor cursor, nir_cf_node *node)
+ {
+    nir_block *before, *after;
+ 
+    split_block_cursor(cursor, &before, &after);
+ 
+    if (node->type == nir_cf_node_block) {
+       nir_block *block = nir_cf_node_as_block(node);
+       exec_node_insert_after(&before->cf_node.node, &block->cf_node.node);
+       block->cf_node.parent = before->cf_node.parent;
+       /* stitch_blocks() assumes that any block that ends with a jump has
+        * already been setup with the correct successors, so we need to set
+        * up jumps here as the block is being inserted.
+        */
+       if (block_ends_in_jump(block))
+          nir_handle_add_jump(block);
+ 
+       stitch_blocks(block, after);
+       stitch_blocks(before, block);
+    } else {
+       update_if_uses(node);
+       insert_non_block(before, node, after);
+    }
+ }
+ 
+ static bool
+ replace_ssa_def_uses(nir_ssa_def *def, void *void_impl)
+ {
+    nir_function_impl *impl = void_impl;
+    void *mem_ctx = ralloc_parent(impl);
+ 
+    nir_ssa_undef_instr *undef =
+       nir_ssa_undef_instr_create(mem_ctx, def->num_components);
+    nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
+    nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def));
+    return true;
+ }
+ 
+ static void
+ cleanup_cf_node(nir_cf_node *node, nir_function_impl *impl)
+ {
+    switch (node->type) {
+    case nir_cf_node_block: {
+       nir_block *block = nir_cf_node_as_block(node);
+       /* We need to walk the instructions and clean up defs/uses */
+       nir_foreach_instr_safe(block, instr) {
+          if (instr->type == nir_instr_type_jump) {
+             nir_jump_type jump_type = nir_instr_as_jump(instr)->type;
+             unlink_jump(block, jump_type, false);
+          } else {
+             nir_foreach_ssa_def(instr, replace_ssa_def_uses, impl);
+             nir_instr_remove(instr);
+          }
+       }
+       break;
+    }
+ 
+    case nir_cf_node_if: {
+       nir_if *if_stmt = nir_cf_node_as_if(node);
+       foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list)
+          cleanup_cf_node(child, impl);
+       foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list)
+          cleanup_cf_node(child, impl);
+ 
+       list_del(&if_stmt->condition.use_link);
+       break;
+    }
+ 
+    case nir_cf_node_loop: {
+       nir_loop *loop = nir_cf_node_as_loop(node);
+       foreach_list_typed(nir_cf_node, child, node, &loop->body)
+          cleanup_cf_node(child, impl);
+       break;
+    }
+    case nir_cf_node_function: {
+       nir_function_impl *impl = nir_cf_node_as_function(node);
+       foreach_list_typed(nir_cf_node, child, node, &impl->body)
+          cleanup_cf_node(child, impl);
+       break;
+    }
+    default:
+       unreachable("Invalid CF node type");
+    }
+ }
+ 
+ void
+ nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end)
+ {
+    nir_block *block_begin, *block_end, *block_before, *block_after;
+ 
++   if (nir_cursors_equal(begin, end)) {
++      exec_list_make_empty(&extracted->list);
++      extracted->impl = NULL; /* we shouldn't need this */
++      return;
++   }
++
+    /* In the case where begin points to an instruction in some basic block and
+     * end points to the end of the same basic block, we rely on the fact that
+     * splitting on an instruction moves earlier instructions into a new basic
+     * block. If the later instructions were moved instead, then the end cursor
+     * would be pointing to the same place that begin used to point to, which
+     * is obviously not what we want.
+     */
+    split_block_cursor(begin, &block_before, &block_begin);
+    split_block_cursor(end, &block_end, &block_after);
+ 
+    extracted->impl = nir_cf_node_get_function(&block_begin->cf_node);
+    exec_list_make_empty(&extracted->list);
+ 
+    /* Dominance and other block-related information is toast. */
+    nir_metadata_preserve(extracted->impl, nir_metadata_none);
+ 
+    nir_cf_node *cf_node = &block_begin->cf_node;
+    nir_cf_node *cf_node_end = &block_end->cf_node;
+    while (true) {
+       nir_cf_node *next = nir_cf_node_next(cf_node);
+ 
+       exec_node_remove(&cf_node->node);
+       cf_node->parent = NULL;
+       exec_list_push_tail(&extracted->list, &cf_node->node);
+ 
+       if (cf_node == cf_node_end)
+          break;
+ 
+       cf_node = next;
+    }
+ 
+    stitch_blocks(block_before, block_after);
+ }
+ 
+ void
+ nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor)
+ {
+    nir_block *before, *after;
+ 
++   if (exec_list_is_empty(&cf_list->list))
++      return;
++
+    split_block_cursor(cursor, &before, &after);
+ 
+    foreach_list_typed_safe(nir_cf_node, node, node, &cf_list->list) {
+       exec_node_remove(&node->node);
+       node->parent = before->cf_node.parent;
+       exec_node_insert_node_before(&after->cf_node.node, &node->node);
+    }
+ 
+    stitch_blocks(before,
+                  nir_cf_node_as_block(nir_cf_node_next(&before->cf_node)));
+    stitch_blocks(nir_cf_node_as_block(nir_cf_node_prev(&after->cf_node)),
+                  after);
+ }
+ 
+ void
+ nir_cf_delete(nir_cf_list *cf_list)
+ {
+    foreach_list_typed(nir_cf_node, node, node, &cf_list->list) {
+       cleanup_cf_node(node, cf_list->impl);
+    }
+ }
diff --cc src/compiler/nir/nir_dominance.c

index 0000000000000000000000000000000000000000,b345b85e8a0f94ff1b7e33f5457f643e11cfc442..d95f396807458afba64a405067b7a9e35fa1fe2d

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_dominance.c
+++ b/src/compiler/nir/nir_dominance.c
@@@ -1,0 -1,350 +1,354 @@@
- -   assert(new_idom);
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ #include "nir.h"
+ 
+ /*
+  * Implements the algorithms for computing the dominance tree and the
+  * dominance frontier from "A Simple, Fast Dominance Algorithm" by Cooper,
+  * Harvey, and Kennedy.
+  */
+ 
+ typedef struct {
+    nir_function_impl *impl;
+    bool progress;
+ } dom_state;
+ 
+ static bool
+ init_block_cb(nir_block *block, void *_state)
+ {
+    dom_state *state = (dom_state *) _state;
+    if (block == nir_start_block(state->impl))
+       block->imm_dom = block;
+    else
+       block->imm_dom = NULL;
+    block->num_dom_children = 0;
+ 
+    struct set_entry *entry;
+    set_foreach(block->dom_frontier, entry) {
+       _mesa_set_remove(block->dom_frontier, entry);
+    }
+ 
+    return true;
+ }
+ 
+ static nir_block *
+ intersect(nir_block *b1, nir_block *b2)
+ {
+    while (b1 != b2) {
+       /*
+        * Note, the comparisons here are the opposite of what the paper says
+        * because we index blocks from beginning -> end (i.e. reverse
+        * post-order) instead of post-order like they assume.
+        */
+       while (b1->index > b2->index)
+          b1 = b1->imm_dom;
+       while (b2->index > b1->index)
+          b2 = b2->imm_dom;
+    }
+ 
+    return b1;
+ }
+ 
+ static bool
+ calc_dominance_cb(nir_block *block, void *_state)
+ {
+    dom_state *state = (dom_state *) _state;
+    if (block == nir_start_block(state->impl))
+       return true;
+ 
+    nir_block *new_idom = NULL;
+    struct set_entry *entry;
+    set_foreach(block->predecessors, entry) {
+       nir_block *pred = (nir_block *) entry->key;
+ 
+       if (pred->imm_dom) {
+          if (new_idom)
+             new_idom = intersect(pred, new_idom);
+          else
+             new_idom = pred;
+       }
+    }
+ 
+    if (block->imm_dom != new_idom) {
+       block->imm_dom = new_idom;
+       state->progress = true;
+    }
+ 
+    return true;
+ }
+ 
+ static bool
+ calc_dom_frontier_cb(nir_block *block, void *state)
+ {
+    (void) state;
+ 
+    if (block->predecessors->entries > 1) {
+       struct set_entry *entry;
+       set_foreach(block->predecessors, entry) {
+          nir_block *runner = (nir_block *) entry->key;
++
++         /* Skip unreachable predecessors */
++         if (runner->imm_dom == NULL)
++            continue;
++
+          while (runner != block->imm_dom) {
+             _mesa_set_add(runner->dom_frontier, block);
+             runner = runner->imm_dom;
+          }
+       }
+    }
+ 
+    return true;
+ }
+ 
+ /*
+  * Compute each node's children in the dominance tree from the immediate
+  * dominator information. We do this in three stages:
+  *
+  * 1. Calculate the number of children each node has
+  * 2. Allocate arrays, setting the number of children to 0 again
+  * 3. For each node, add itself to its parent's list of children, using
+  *    num_dom_children as an index - at the end of this step, num_dom_children
+  *    for each node will be the same as it was at the end of step #1.
+  */
+ 
+ static bool
+ block_count_children(nir_block *block, void *state)
+ {
+    (void) state;
+ 
+    if (block->imm_dom)
+       block->imm_dom->num_dom_children++;
+ 
+    return true;
+ }
+ 
+ static bool
+ block_alloc_children(nir_block *block, void *state)
+ {
+    void *mem_ctx = state;
+ 
+    block->dom_children = ralloc_array(mem_ctx, nir_block *,
+                                       block->num_dom_children);
+    block->num_dom_children = 0;
+ 
+    return true;
+ }
+ 
+ static bool
+ block_add_child(nir_block *block, void *state)
+ {
+    (void) state;
+ 
+    if (block->imm_dom)
+       block->imm_dom->dom_children[block->imm_dom->num_dom_children++] = block;
+ 
+    return true;
+ }
+ 
+ static void
+ calc_dom_children(nir_function_impl* impl)
+ {
+    void *mem_ctx = ralloc_parent(impl);
+ 
+    nir_foreach_block(impl, block_count_children, NULL);
+    nir_foreach_block(impl, block_alloc_children, mem_ctx);
+    nir_foreach_block(impl, block_add_child, NULL);
+ }
+ 
+ static void
+ calc_dfs_indicies(nir_block *block, unsigned *index)
+ {
+    block->dom_pre_index = (*index)++;
+ 
+    for (unsigned i = 0; i < block->num_dom_children; i++)
+       calc_dfs_indicies(block->dom_children[i], index);
+ 
+    block->dom_post_index = (*index)++;
+ }
+ 
+ void
+ nir_calc_dominance_impl(nir_function_impl *impl)
+ {
+    if (impl->valid_metadata & nir_metadata_dominance)
+       return;
+ 
+    nir_metadata_require(impl, nir_metadata_block_index);
+ 
+    dom_state state;
+    state.impl = impl;
+    state.progress = true;
+ 
+    nir_foreach_block(impl, init_block_cb, &state);
+ 
+    while (state.progress) {
+       state.progress = false;
+       nir_foreach_block(impl, calc_dominance_cb, &state);
+    }
+ 
+    nir_foreach_block(impl, calc_dom_frontier_cb, &state);
+ 
+    nir_block *start_block = nir_start_block(impl);
+    start_block->imm_dom = NULL;
+ 
+    calc_dom_children(impl);
+ 
+    unsigned dfs_index = 0;
+    calc_dfs_indicies(start_block, &dfs_index);
+ }
+ 
+ void
+ nir_calc_dominance(nir_shader *shader)
+ {
+    nir_foreach_function(shader, function) {
+       if (function->impl)
+          nir_calc_dominance_impl(function->impl);
+    }
+ }
+ 
+ /**
+  * Computes the least common anscestor of two blocks.  If one of the blocks
+  * is null, the other block is returned.
+  */
+ nir_block *
+ nir_dominance_lca(nir_block *b1, nir_block *b2)
+ {
+    if (b1 == NULL)
+       return b2;
+ 
+    if (b2 == NULL)
+       return b1;
+ 
+    assert(nir_cf_node_get_function(&b1->cf_node) ==
+           nir_cf_node_get_function(&b2->cf_node));
+ 
+    assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata &
+           nir_metadata_dominance);
+ 
+    return intersect(b1, b2);
+ }
+ 
+ /**
+  * Returns true if parent dominates child
+  */
+ bool
+ nir_block_dominates(nir_block *parent, nir_block *child)
+ {
+    assert(nir_cf_node_get_function(&parent->cf_node) ==
+           nir_cf_node_get_function(&child->cf_node));
+ 
+    assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata &
+           nir_metadata_dominance);
+ 
+    return child->dom_pre_index >= parent->dom_pre_index &&
+           child->dom_post_index <= parent->dom_post_index;
+ }
+ 
+ static bool
+ dump_block_dom(nir_block *block, void *state)
+ {
+    FILE *fp = state;
+    if (block->imm_dom)
+       fprintf(fp, "\t%u -> %u\n", block->imm_dom->index, block->index);
+    return true;
+ }
+ 
+ void
+ nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp)
+ {
+    fprintf(fp, "digraph doms_%s {\n", impl->function->name);
+    nir_foreach_block(impl, dump_block_dom, fp);
+    fprintf(fp, "}\n\n");
+ }
+ 
+ void
+ nir_dump_dom_tree(nir_shader *shader, FILE *fp)
+ {
+    nir_foreach_function(shader, function) {
+       if (function->impl)
+          nir_dump_dom_tree_impl(function->impl, fp);
+    }
+ }
+ 
+ static bool
+ dump_block_dom_frontier(nir_block *block, void *state)
+ {
+    FILE *fp = state;
+ 
+    fprintf(fp, "DF(%u) = {", block->index);
+    struct set_entry *entry;
+    set_foreach(block->dom_frontier, entry) {
+       nir_block *df = (nir_block *) entry->key;
+       fprintf(fp, "%u, ", df->index);
+    }
+    fprintf(fp, "}\n");
+    return true;
+ }
+ 
+ void
+ nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp)
+ {
+    nir_foreach_block(impl, dump_block_dom_frontier, fp);
+ }
+ 
+ void
+ nir_dump_dom_frontier(nir_shader *shader, FILE *fp)
+ {
+    nir_foreach_function(shader, function) {
+       if (function->impl)
+          nir_dump_dom_frontier_impl(function->impl, fp);
+    }
+ }
+ 
+ static bool
+ dump_block_succs(nir_block *block, void *state)
+ {
+    FILE *fp = state;
+    if (block->successors[0])
+       fprintf(fp, "\t%u -> %u\n", block->index, block->successors[0]->index);
+    if (block->successors[1])
+       fprintf(fp, "\t%u -> %u\n", block->index, block->successors[1]->index);
+    return true;
+ }
+ 
+ void
+ nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp)
+ {
+    fprintf(fp, "digraph cfg_%s {\n", impl->function->name);
+    nir_foreach_block(impl, dump_block_succs, fp);
+    fprintf(fp, "}\n\n");
+ }
+ 
+ void
+ nir_dump_cfg(nir_shader *shader, FILE *fp)
+ {
+    nir_foreach_function(shader, function) {
+       if (function->impl)
+          nir_dump_cfg_impl(function->impl, fp);
+    }
+ }
diff --cc src/compiler/nir/nir_gather_info.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..b84915c2d2b1568f89f571491dcb344aedb9d1fc

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/nir_gather_info.c
@@@ -1,0 -1,0 +1,109 @@@
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "nir.h"
++
++static void
++gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader)
++{
++   switch (instr->intrinsic) {
++   case nir_intrinsic_discard:
++      assert(shader->stage == MESA_SHADER_FRAGMENT);
++      shader->info.fs.uses_discard = true;
++      break;
++
++   case nir_intrinsic_load_front_face:
++   case nir_intrinsic_load_vertex_id:
++   case nir_intrinsic_load_vertex_id_zero_base:
++   case nir_intrinsic_load_base_vertex:
++   case nir_intrinsic_load_instance_id:
++   case nir_intrinsic_load_sample_id:
++   case nir_intrinsic_load_sample_pos:
++   case nir_intrinsic_load_sample_mask_in:
++   case nir_intrinsic_load_primitive_id:
++   case nir_intrinsic_load_invocation_id:
++   case nir_intrinsic_load_local_invocation_id:
++   case nir_intrinsic_load_work_group_id:
++   case nir_intrinsic_load_num_work_groups:
++      shader->info.system_values_read |=
++         (1 << nir_system_value_from_intrinsic(instr->intrinsic));
++      break;
++
++   case nir_intrinsic_end_primitive:
++   case nir_intrinsic_end_primitive_with_counter:
++      assert(shader->stage == MESA_SHADER_GEOMETRY);
++      shader->info.gs.uses_end_primitive = 1;
++      break;
++
++   default:
++      break;
++   }
++}
++
++static void
++gather_tex_info(nir_tex_instr *instr, nir_shader *shader)
++{
++   if (instr->op == nir_texop_tg4)
++      shader->info.uses_texture_gather = true;
++}
++
++static bool
++gather_info_block(nir_block *block, void *shader)
++{
++   nir_foreach_instr(block, instr) {
++      switch (instr->type) {
++      case nir_instr_type_intrinsic:
++         gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader);
++         break;
++      case nir_instr_type_tex:
++         gather_tex_info(nir_instr_as_tex(instr), shader);
++         break;
++      case nir_instr_type_call:
++         assert(!"nir_shader_gather_info only works if functions are inlined");
++         break;
++      default:
++         break;
++      }
++   }
++
++   return true;
++}
++
++void
++nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint)
++{
++   shader->info.inputs_read = 0;
++   foreach_list_typed(nir_variable, var, node, &shader->inputs)
++      shader->info.inputs_read |= nir_variable_get_io_mask(var, shader->stage);
++
++   /* TODO: Some day we may need to add stream support to NIR */
++   shader->info.outputs_written = 0;
++   foreach_list_typed(nir_variable, var, node, &shader->outputs)
++      shader->info.outputs_written |= nir_variable_get_io_mask(var, shader->stage);
++
++   shader->info.system_values_read = 0;
++   foreach_list_typed(nir_variable, var, node, &shader->system_values)
++      shader->info.system_values_read |= nir_variable_get_io_mask(var, shader->stage);
++
++   nir_foreach_block(entrypoint, gather_info_block, shader);
++}
diff --cc src/compiler/nir/nir_inline_functions.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..3cf832790531dd97ffdd2e21dad31f3abb5a365b

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/nir_inline_functions.c
@@@ -1,0 -1,0 +1,153 @@@
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "nir.h"
++#include "nir_builder.h"
++#include "nir_control_flow.h"
++
++struct inline_functions_state {
++   struct set *inlined;
++   nir_builder builder;
++   bool progress;
++};
++
++static bool inline_function_impl(nir_function_impl *impl, struct set *inlined);
++
++static bool
++inline_functions_block(nir_block *block, void *void_state)
++{
++   struct inline_functions_state *state = void_state;
++
++   nir_builder *b = &state->builder;
++
++   /* This is tricky.  We're iterating over instructions in a block but, as
++    * we go, the block and its instruction list are being split into
++    * pieces.  However, this *should* be safe since foreach_safe always
++    * stashes the next thing in the iteration.  That next thing will
++    * properly get moved to the next block when it gets split, and we
++    * continue iterating there.
++    */
++   nir_foreach_instr_safe(block, instr) {
++      if (instr->type != nir_instr_type_call)
++         continue;
++
++      state->progress = true;
++
++      nir_call_instr *call = nir_instr_as_call(instr);
++      assert(call->callee->impl);
++
++      inline_function_impl(call->callee->impl, state->inlined);
++
++      nir_function_impl *callee_copy =
++         nir_function_impl_clone(call->callee->impl);
++
++      exec_list_append(&b->impl->locals, &callee_copy->locals);
++      exec_list_append(&b->impl->registers, &callee_copy->registers);
++
++      b->cursor = nir_before_instr(&call->instr);
++
++      /* Add copies of all in parameters */
++      assert(call->num_params == callee_copy->num_params);
++      for (unsigned i = 0; i < callee_copy->num_params; i++) {
++         /* Only in or inout parameters */
++         if (call->callee->params[i].param_type == nir_parameter_out)
++            continue;
++
++         nir_copy_deref_var(b, nir_deref_var_create(b->shader,
++                                                    callee_copy->params[i]),
++                               call->params[i]);
++      }
++
++      /* Pluck the body out of the function and place it here */
++      nir_cf_list body;
++      nir_cf_list_extract(&body, &callee_copy->body);
++      nir_cf_reinsert(&body, b->cursor);
++
++      b->cursor = nir_before_instr(&call->instr);
++
++      /* Add copies of all out parameters and the return */
++      assert(call->num_params == callee_copy->num_params);
++      for (unsigned i = 0; i < callee_copy->num_params; i++) {
++         /* Only out or inout parameters */
++         if (call->callee->params[i].param_type == nir_parameter_in)
++            continue;
++
++         nir_copy_deref_var(b, call->params[i],
++                               nir_deref_var_create(b->shader,
++                                                    callee_copy->params[i]));
++      }
++      if (!glsl_type_is_void(call->callee->return_type)) {
++         nir_copy_deref_var(b, call->return_deref,
++                               nir_deref_var_create(b->shader,
++                                                    callee_copy->return_var));
++      }
++
++      nir_instr_remove(&call->instr);
++   }
++
++   return true;
++}
++
++static bool
++inline_function_impl(nir_function_impl *impl, struct set *inlined)
++{
++   if (_mesa_set_search(inlined, impl))
++      return false; /* Already inlined */
++
++   struct inline_functions_state state;
++
++   state.inlined = inlined;
++   state.progress = false;
++   nir_builder_init(&state.builder, impl);
++
++   nir_foreach_block(impl, inline_functions_block, &state);
++
++   if (state.progress) {
++      /* SSA and register indices are completely messed up now */
++      nir_index_ssa_defs(impl);
++      nir_index_local_regs(impl);
++
++      nir_metadata_preserve(impl, nir_metadata_none);
++   }
++
++   _mesa_set_add(inlined, impl);
++
++   return state.progress;
++}
++
++bool
++nir_inline_functions(nir_shader *shader)
++{
++   struct set *inlined = _mesa_set_create(NULL, _mesa_hash_pointer,
++                                          _mesa_key_pointer_equal);
++   bool progress = false;
++
++   nir_foreach_function(shader, function) {
++      if (function->impl)
++         progress = inline_function_impl(function->impl, inlined) || progress;
++   }
++
++   _mesa_set_destroy(inlined, NULL);
++
++   return progress;
++}
diff --cc src/compiler/nir/nir_instr_set.c

index 0000000000000000000000000000000000000000,d3f939fe8058afcc1a3c2acc15ed67aa57a63d90..eb021326097fbd3ef89a21565a2dbd6699226633

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@@ -1,0 -1,519 +1,522 @@@
- -   hash = HASH(hash, instr->sampler_array_size);
+ /*
+  * Copyright © 2014 Connor Abbott
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  */
+ 
+ #include "nir_instr_set.h"
+ #include "nir_vla.h"
+ 
+ #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
+ 
+ static uint32_t
+ hash_src(uint32_t hash, const nir_src *src)
+ {
+    assert(src->is_ssa);
+    hash = HASH(hash, src->ssa);
+    return hash;
+ }
+ 
+ static uint32_t
+ hash_alu_src(uint32_t hash, const nir_alu_src *src, unsigned num_components)
+ {
+    hash = HASH(hash, src->abs);
+    hash = HASH(hash, src->negate);
+ 
+    for (unsigned i = 0; i < num_components; i++)
+       hash = HASH(hash, src->swizzle[i]);
+ 
+    hash = hash_src(hash, &src->src);
+    return hash;
+ }
+ 
+ static uint32_t
+ hash_alu(uint32_t hash, const nir_alu_instr *instr)
+ {
+    hash = HASH(hash, instr->op);
+    hash = HASH(hash, instr->dest.dest.ssa.num_components);
+ 
+    if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+       assert(nir_op_infos[instr->op].num_inputs == 2);
+       uint32_t hash0 = hash_alu_src(hash, &instr->src[0],
+                                     nir_ssa_alu_instr_src_components(instr, 0));
+       uint32_t hash1 = hash_alu_src(hash, &instr->src[1],
+                                     nir_ssa_alu_instr_src_components(instr, 1));
+       /* For commutative operations, we need some commutative way of
+        * combining the hashes.  One option would be to XOR them but that
+        * means that anything with two identical sources will hash to 0 and
+        * that's common enough we probably don't want the guaranteed
+        * collision.  Either addition or multiplication will also work.
+        */
+       hash = hash0 * hash1;
+    } else {
+       for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+          hash = hash_alu_src(hash, &instr->src[i],
+                              nir_ssa_alu_instr_src_components(instr, i));
+       }
+    }
+ 
+    return hash;
+ }
+ 
+ static uint32_t
+ hash_load_const(uint32_t hash, const nir_load_const_instr *instr)
+ {
+    hash = HASH(hash, instr->def.num_components);
+ 
+    hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f,
+                                           instr->def.num_components
+                                              * sizeof(instr->value.f[0]));
+ 
+    return hash;
+ }
+ 
+ static int
+ cmp_phi_src(const void *data1, const void *data2)
+ {
+    nir_phi_src *src1 = *(nir_phi_src **)data1;
+    nir_phi_src *src2 = *(nir_phi_src **)data2;
+    return src1->pred - src2->pred;
+ }
+ 
+ static uint32_t
+ hash_phi(uint32_t hash, const nir_phi_instr *instr)
+ {
+    hash = HASH(hash, instr->instr.block);
+ 
+    /* sort sources by predecessor, since the order shouldn't matter */
+    unsigned num_preds = instr->instr.block->predecessors->entries;
+    NIR_VLA(nir_phi_src *, srcs, num_preds);
+    unsigned i = 0;
+    nir_foreach_phi_src(instr, src) {
+       srcs[i++] = src;
+    }
+ 
+    qsort(srcs, num_preds, sizeof(nir_phi_src *), cmp_phi_src);
+ 
+    for (i = 0; i < num_preds; i++) {
+       hash = hash_src(hash, &srcs[i]->src);
+       hash = HASH(hash, srcs[i]->pred);
+    }
+ 
+    return hash;
+ }
+ 
+ static uint32_t
+ hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr)
+ {
+    const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
+    hash = HASH(hash, instr->intrinsic);
+ 
+    if (info->has_dest)
+       hash = HASH(hash, instr->dest.ssa.num_components);
+ 
+    assert(info->num_variables == 0);
+ 
+    hash = _mesa_fnv32_1a_accumulate_block(hash, instr->const_index,
+                                           info->num_indices
+                                              * sizeof(instr->const_index[0]));
+    return hash;
+ }
+ 
+ static uint32_t
+ hash_tex(uint32_t hash, const nir_tex_instr *instr)
+ {
+    hash = HASH(hash, instr->op);
+    hash = HASH(hash, instr->num_srcs);
+ 
+    for (unsigned i = 0; i < instr->num_srcs; i++) {
+       hash = HASH(hash, instr->src[i].src_type);
+       hash = hash_src(hash, &instr->src[i].src);
+    }
+ 
+    hash = HASH(hash, instr->coord_components);
+    hash = HASH(hash, instr->sampler_dim);
+    hash = HASH(hash, instr->is_array);
+    hash = HASH(hash, instr->is_shadow);
+    hash = HASH(hash, instr->is_new_style_shadow);
+    hash = HASH(hash, instr->const_offset);
+    unsigned component = instr->component;
+    hash = HASH(hash, component);
++   hash = HASH(hash, instr->texture_index);
++   hash = HASH(hash, instr->texture_array_size);
+    hash = HASH(hash, instr->sampler_index);
- -         tex1->sampler_index != tex2->sampler_index ||
- -         tex1->sampler_array_size != tex2->sampler_array_size) {
+ 
+    assert(!instr->sampler);
+ 
+    return hash;
+ }
+ 
+ /* Computes a hash of an instruction for use in a hash table. Note that this
+  * will only work for instructions where instr_can_rewrite() returns true, and
+  * it should return identical hashes for two instructions that are the same
+  * according nir_instrs_equal().
+  */
+ 
+ static uint32_t
+ hash_instr(const void *data)
+ {
+    const nir_instr *instr = data;
+    uint32_t hash = _mesa_fnv32_1a_offset_bias;
+ 
+    switch (instr->type) {
+    case nir_instr_type_alu:
+       hash = hash_alu(hash, nir_instr_as_alu(instr));
+       break;
+    case nir_instr_type_load_const:
+       hash = hash_load_const(hash, nir_instr_as_load_const(instr));
+       break;
+    case nir_instr_type_phi:
+       hash = hash_phi(hash, nir_instr_as_phi(instr));
+       break;
+    case nir_instr_type_intrinsic:
+       hash = hash_intrinsic(hash, nir_instr_as_intrinsic(instr));
+       break;
+    case nir_instr_type_tex:
+       hash = hash_tex(hash, nir_instr_as_tex(instr));
+       break;
+    default:
+       unreachable("Invalid instruction type");
+    }
+ 
+    return hash;
+ }
+ 
+ bool
+ nir_srcs_equal(nir_src src1, nir_src src2)
+ {
+    if (src1.is_ssa) {
+       if (src2.is_ssa) {
+          return src1.ssa == src2.ssa;
+       } else {
+          return false;
+       }
+    } else {
+       if (src2.is_ssa) {
+          return false;
+       } else {
+          if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL))
+             return false;
+ 
+          if (src1.reg.indirect) {
+             if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect))
+                return false;
+          }
+ 
+          return src1.reg.reg == src2.reg.reg &&
+                 src1.reg.base_offset == src2.reg.base_offset;
+       }
+    }
+ }
+ 
+ static bool
+ nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
+                    unsigned src1, unsigned src2)
+ {
+    if (alu1->src[src1].abs != alu2->src[src2].abs ||
+        alu1->src[src1].negate != alu2->src[src2].negate)
+       return false;
+ 
+    for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) {
+       if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i])
+          return false;
+    }
+ 
+    return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src);
+ }
+ 
+ /* Returns "true" if two instructions are equal. Note that this will only
+  * work for the subset of instructions defined by instr_can_rewrite(). Also,
+  * it should only return "true" for instructions that hash_instr() will return
+  * the same hash for (ignoring collisions, of course).
+  */
+ 
+ static bool
+ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
+ {
+    if (instr1->type != instr2->type)
+       return false;
+ 
+    switch (instr1->type) {
+    case nir_instr_type_alu: {
+       nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
+       nir_alu_instr *alu2 = nir_instr_as_alu(instr2);
+ 
+       if (alu1->op != alu2->op)
+          return false;
+ 
+       /* TODO: We can probably acutally do something more inteligent such
+        * as allowing different numbers and taking a maximum or something
+        * here */
+       if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
+          return false;
+ 
+       if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+          assert(nir_op_infos[alu1->op].num_inputs == 2);
+          return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
+                  nir_alu_srcs_equal(alu1, alu2, 1, 1)) ||
+                 (nir_alu_srcs_equal(alu1, alu2, 0, 1) &&
+                  nir_alu_srcs_equal(alu1, alu2, 1, 0));
+       } else {
+          for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
+             if (!nir_alu_srcs_equal(alu1, alu2, i, i))
+                return false;
+          }
+       }
+       return true;
+    }
+    case nir_instr_type_tex: {
+       nir_tex_instr *tex1 = nir_instr_as_tex(instr1);
+       nir_tex_instr *tex2 = nir_instr_as_tex(instr2);
+ 
+       if (tex1->op != tex2->op)
+          return false;
+ 
+       if (tex1->num_srcs != tex2->num_srcs)
+          return false;
+       for (unsigned i = 0; i < tex1->num_srcs; i++) {
+          if (tex1->src[i].src_type != tex2->src[i].src_type ||
+              !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) {
+             return false;
+          }
+       }
+ 
+       if (tex1->coord_components != tex2->coord_components ||
+           tex1->sampler_dim != tex2->sampler_dim ||
+           tex1->is_array != tex2->is_array ||
+           tex1->is_shadow != tex2->is_shadow ||
+           tex1->is_new_style_shadow != tex2->is_new_style_shadow ||
+           memcmp(tex1->const_offset, tex2->const_offset,
+                  sizeof(tex1->const_offset)) != 0 ||
+           tex1->component != tex2->component ||
- -      assert(!tex1->sampler && !tex2->sampler);
++         tex1->texture_index != tex2->texture_index ||
++         tex1->texture_array_size != tex2->texture_array_size ||
++         tex1->sampler_index != tex2->sampler_index) {
+          return false;
+       }
+ 
+       /* Don't support un-lowered sampler derefs currently. */
- -      if (tex->sampler)
++      assert(!tex1->texture && !tex1->sampler &&
++             !tex2->texture && !tex2->sampler);
+ 
+       return true;
+    }
+    case nir_instr_type_load_const: {
+       nir_load_const_instr *load1 = nir_instr_as_load_const(instr1);
+       nir_load_const_instr *load2 = nir_instr_as_load_const(instr2);
+ 
+       if (load1->def.num_components != load2->def.num_components)
+          return false;
+ 
+       return memcmp(load1->value.f, load2->value.f,
+                     load1->def.num_components * sizeof(*load2->value.f)) == 0;
+    }
+    case nir_instr_type_phi: {
+       nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
+       nir_phi_instr *phi2 = nir_instr_as_phi(instr2);
+ 
+       if (phi1->instr.block != phi2->instr.block)
+          return false;
+ 
+       nir_foreach_phi_src(phi1, src1) {
+          nir_foreach_phi_src(phi2, src2) {
+             if (src1->pred == src2->pred) {
+                if (!nir_srcs_equal(src1->src, src2->src))
+                   return false;
+ 
+                break;
+             }
+          }
+       }
+ 
+       return true;
+    }
+    case nir_instr_type_intrinsic: {
+       nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1);
+       nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2);
+       const nir_intrinsic_info *info =
+          &nir_intrinsic_infos[intrinsic1->intrinsic];
+ 
+       if (intrinsic1->intrinsic != intrinsic2->intrinsic ||
+           intrinsic1->num_components != intrinsic2->num_components)
+          return false;
+ 
+       if (info->has_dest && intrinsic1->dest.ssa.num_components !=
+                             intrinsic2->dest.ssa.num_components)
+          return false;
+ 
+       for (unsigned i = 0; i < info->num_srcs; i++) {
+          if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i]))
+             return false;
+       }
+ 
+       assert(info->num_variables == 0);
+ 
+       for (unsigned i = 0; i < info->num_indices; i++) {
+          if (intrinsic1->const_index[i] != intrinsic2->const_index[i])
+             return false;
+       }
+ 
+       return true;
+    }
+    case nir_instr_type_call:
+    case nir_instr_type_jump:
+    case nir_instr_type_ssa_undef:
+    case nir_instr_type_parallel_copy:
+    default:
+       unreachable("Invalid instruction type");
+    }
+ 
+    return false;
+ }
+ 
+ static bool
+ src_is_ssa(nir_src *src, void *data)
+ {
+    (void) data;
+    return src->is_ssa;
+ }
+ 
+ static bool
+ dest_is_ssa(nir_dest *dest, void *data)
+ {
+    (void) data;
+    return dest->is_ssa;
+ }
+ 
+ /* This function determines if uses of an instruction can safely be rewritten
+  * to use another identical instruction instead. Note that this function must
+  * be kept in sync with hash_instr() and nir_instrs_equal() -- only
+  * instructions that pass this test will be handed on to those functions, and
+  * conversely they must handle everything that this function returns true for.
+  */
+ 
+ static bool
+ instr_can_rewrite(nir_instr *instr)
+ {
+    /* We only handle SSA. */
+    if (!nir_foreach_dest(instr, dest_is_ssa, NULL) ||
+        !nir_foreach_src(instr, src_is_ssa, NULL))
+       return false;
+ 
+    switch (instr->type) {
+    case nir_instr_type_alu:
+    case nir_instr_type_load_const:
+    case nir_instr_type_phi:
+       return true;
+    case nir_instr_type_tex: {
+       nir_tex_instr *tex = nir_instr_as_tex(instr);
+ 
+       /* Don't support un-lowered sampler derefs currently. */
++      if (tex->texture || tex->sampler)
+          return false;
+ 
+       return true;
+    }
+    case nir_instr_type_intrinsic: {
+       const nir_intrinsic_info *info =
+          &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
+       return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+              (info->flags & NIR_INTRINSIC_CAN_REORDER) &&
+              info->num_variables == 0; /* not implemented yet */
+    }
+    case nir_instr_type_call:
+    case nir_instr_type_jump:
+    case nir_instr_type_ssa_undef:
+       return false;
+    case nir_instr_type_parallel_copy:
+    default:
+       unreachable("Invalid instruction type");
+    }
+ 
+    return false;
+ }
+ 
+ static nir_ssa_def *
+ nir_instr_get_dest_ssa_def(nir_instr *instr)
+ {
+    switch (instr->type) {
+    case nir_instr_type_alu:
+       assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
+       return &nir_instr_as_alu(instr)->dest.dest.ssa;
+    case nir_instr_type_load_const:
+       return &nir_instr_as_load_const(instr)->def;
+    case nir_instr_type_phi:
+       assert(nir_instr_as_phi(instr)->dest.is_ssa);
+       return &nir_instr_as_phi(instr)->dest.ssa;
+    case nir_instr_type_intrinsic:
+       assert(nir_instr_as_intrinsic(instr)->dest.is_ssa);
+       return &nir_instr_as_intrinsic(instr)->dest.ssa;
+    case nir_instr_type_tex:
+       assert(nir_instr_as_tex(instr)->dest.is_ssa);
+       return &nir_instr_as_tex(instr)->dest.ssa;
+    default:
+       unreachable("We never ask for any of these");
+    }
+ }
+ 
+ static bool
+ cmp_func(const void *data1, const void *data2)
+ {
+    return nir_instrs_equal(data1, data2);
+ }
+ 
+ struct set *
+ nir_instr_set_create(void *mem_ctx)
+ {
+    return _mesa_set_create(mem_ctx, hash_instr, cmp_func);
+ }
+ 
+ void
+ nir_instr_set_destroy(struct set *instr_set)
+ {
+    _mesa_set_destroy(instr_set, NULL);
+ }
+ 
+ bool
+ nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr)
+ {
+    if (!instr_can_rewrite(instr))
+       return false;
+ 
+    struct set_entry *entry = _mesa_set_search(instr_set, instr);
+    if (entry) {
+       nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr);
+       nir_ssa_def *new_def =
+          nir_instr_get_dest_ssa_def((nir_instr *) entry->key);
+       nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def));
+       return true;
+    }
+ 
+    _mesa_set_add(instr_set, instr);
+    return false;
+ }
+ 
+ void
+ nir_instr_set_remove(struct set *instr_set, nir_instr *instr)
+ {
+    if (!instr_can_rewrite(instr))
+       return;
+ 
+    struct set_entry *entry = _mesa_set_search(instr_set, instr);
+    if (entry)
+       _mesa_set_remove(instr_set, entry);
+ }
+ 
diff --cc src/compiler/nir/nir_intrinsics.h

index 0000000000000000000000000000000000000000,62eead4878a04befb8a943027dacdcb224201169..3e7cf735a1b5a565d6797b9dcc1e780517a2d5ef

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@@ -1,0 -1,316 +1,367 @@@
- -/* src[] = { offset }. const_index[] = { base } */
- -LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ /**
+  * This header file defines all the available intrinsics in one place. It
+  * expands to a list of macros of the form:
+  *
+  * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components,
+  *              num_variables, num_indices, flags)
+  *
+  * Which should correspond one-to-one with the nir_intrinsic_info structure. It
+  * is included in both ir.h to create the nir_intrinsic enum (with members of
+  * the form nir_intrinsic_(name)) and and in opcodes.c to create
+  * nir_intrinsic_infos, which is a const array of nir_intrinsic_info structures
+  * for each intrinsic.
+  */
+ 
+ #define ARR(...) { __VA_ARGS__ }
+ 
+ 
+ INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+ INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)
+ INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
+ 
+ /*
+  * Interpolation of input.  The interp_var_at* intrinsics are similar to the
+  * load_var intrinsic acting an a shader input except that they interpolate
+  * the input differently.  The at_sample and at_offset intrinsics take an
+  * aditional source that is a integer sample id or a vec2 position offset
+  * respectively.
+  */
+ 
+ INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0,
+           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0,
+           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
+           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ 
+ /*
+  * Ask the driver for the size of a given buffer. It takes the buffer index
+  * as source.
+  */
+ INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
+           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ 
+ /*
+  * a barrier is an intrinsic with no inputs/outputs but which can't be moved
+  * around/optimized in general
+  */
+ #define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0)
+ 
+ BARRIER(barrier)
+ BARRIER(discard)
+ 
+ /*
+  * Memory barrier with semantics analogous to the memoryBarrier() GLSL
+  * intrinsic.
+  */
+ BARRIER(memory_barrier)
+ 
+ /*
+  * Shader clock intrinsic with semantics analogous to the clock2x32ARB()
+  * GLSL intrinsic.
+  * The latter can be used as code motion barrier, which is currently not
+  * feasible with NIR.
+  */
+ INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+ 
+ /*
+  * Memory barrier with semantics analogous to the compute shader
+  * groupMemoryBarrier(), memoryBarrierAtomicCounter(), memoryBarrierBuffer(),
+  * memoryBarrierImage() and memoryBarrierShared() GLSL intrinsics.
+  */
+ BARRIER(group_memory_barrier)
+ BARRIER(memory_barrier_atomic_counter)
+ BARRIER(memory_barrier_buffer)
+ BARRIER(memory_barrier_image)
+ BARRIER(memory_barrier_shared)
+ 
+ /** A conditional discard, with a single boolean source. */
+ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
+ 
+ /**
+  * Basic Geometry Shader intrinsics.
+  *
+  * emit_vertex implements GLSL's EmitStreamVertex() built-in.  It takes a single
+  * index, which is the stream ID to write to.
+  *
+  * end_primitive implements GLSL's EndPrimitive() built-in.
+  */
+ INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, 0)
+ INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
+ 
+ /**
+  * Geometry Shader intrinsics with a vertex count.
+  *
+  * Alternatively, drivers may implement these intrinsics, and use
+  * nir_lower_gs_intrinsics() to convert from the basic intrinsics.
+  *
+  * These maintain a count of the number of vertices emitted, as an additional
+  * unsigned integer source.
+  */
+ INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+ INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
+ 
+ /*
+  * Atomic counters
+  *
+  * The *_var variants take an atomic_uint nir_variable, while the other,
+  * lowered, variants take a constant buffer index and register offset.
+  */
+ 
+ #define ATOMIC(name, flags) \
+    INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \
+    INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags)
+ 
+ ATOMIC(inc, 0)
+ ATOMIC(dec, 0)
+ ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE)
+ 
+ /*
+  * Image load, store and atomic intrinsics.
+  *
+  * All image intrinsics take an image target passed as a nir_variable.  Image
+  * variables contain a number of memory and layout qualifiers that influence
+  * the semantics of the intrinsic.
+  *
+  * All image intrinsics take a four-coordinate vector and a sample index as
+  * first two sources, determining the location within the image that will be
+  * accessed by the intrinsic.  Components not applicable to the image target
+  * in use are undefined.  Image store takes an additional four-component
+  * argument with the value to be written, and image atomic operations take
+  * either one or two additional scalar arguments with the same meaning as in
+  * the ARB_shader_image_load_store specification.
+  */
+ INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0,
+           NIR_INTRINSIC_CAN_ELIMINATE)
+ INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0)
+ INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
+ INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
+           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
+           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ 
++/*
++ * Vulkan descriptor set intrinsic
++ *
++ * The Vulkan API uses a different binding model from GL.  In the Vulkan
++ * API, all external resources are represented by a tripple:
++ *
++ * (descriptor set, binding, array index)
++ *
++ * where the array index is the only thing allowed to be indirect.  The
++ * vulkan_surface_index intrinsic takes the descriptor set and binding as
++ * its first two indices and the array index as its source.  The third
++ * index is a nir_variable_mode in case that's useful to the backend.
++ *
++ * The intended usage is that the shader will call vulkan_surface_index to
++ * get an index and then pass that as the buffer index ubo/ssbo calls.
++ */
++INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 3,
++          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
++
++/*
++ * variable atomic intrinsics
++ *
++ * All of these variable atomic memory operations read a value from memory,
++ * compute a new value using one of the operations below, write the new value
++ * to memory, and return the original value read.
++ *
++ * All operations take 1 source except CompSwap that takes 2. These sources
++ * represent:
++ *
++ * 0: The data parameter to the atomic function (i.e. the value to add
++ *    in shared_atomic_add, etc).
++ * 1: For CompSwap only: the second data parameter.
++ *
++ * All operations take 1 variable deref.
++ */
++INTRINSIC(var_atomic_add, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_imin, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_umin, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_imax, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_umax, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_and, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_or, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_xor, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_exchange, 1, ARR(1), true, 1, 1, 0, 0)
++INTRINSIC(var_atomic_comp_swap, 2, ARR(1, 1), true, 1, 1, 0, 0)
++
+ /*
+  * SSBO atomic intrinsics
+  *
+  * All of the SSBO atomic memory operations read a value from memory,
+  * compute a new value using one of the operations below, write the new
+  * value to memory, and return the original value read.
+  *
+  * All operations take 3 sources except CompSwap that takes 4. These
+  * sources represent:
+  *
+  * 0: The SSBO buffer index.
+  * 1: The offset into the SSBO buffer of the variable that the atomic
+  *    operation will operate on.
+  * 2: The data parameter to the atomic function (i.e. the value to add
+  *    in ssbo_atomic_add, etc).
+  * 3: For CompSwap only: the second data parameter.
+  */
+ INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
+ 
+ /*
+  * CS shared variable atomic intrinsics
+  *
+  * All of the shared variable atomic memory operations read a value from
+  * memory, compute a new value using one of the operations below, write the
+  * new value to memory, and return the original value read.
+  *
+  * All operations take 2 sources except CompSwap that takes 3. These
+  * sources represent:
+  *
+  * 0: The offset into the shared variable storage region that the atomic
+  *    operation will operate on.
+  * 1: The data parameter to the atomic function (i.e. the value to add
+  *    in shared_atomic_add, etc).
+  * 2: For CompSwap only: the second data parameter.
+  */
+ INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ 
+ #define SYSTEM_VALUE(name, components, num_indices) \
+    INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
+    NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ 
+ SYSTEM_VALUE(front_face, 1, 0)
+ SYSTEM_VALUE(vertex_id, 1, 0)
+ SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
+ SYSTEM_VALUE(base_vertex, 1, 0)
+ SYSTEM_VALUE(instance_id, 1, 0)
+ SYSTEM_VALUE(base_instance, 1, 0)
+ SYSTEM_VALUE(draw_id, 1, 0)
+ SYSTEM_VALUE(sample_id, 1, 0)
+ SYSTEM_VALUE(sample_pos, 2, 0)
+ SYSTEM_VALUE(sample_mask_in, 1, 0)
+ SYSTEM_VALUE(primitive_id, 1, 0)
+ SYSTEM_VALUE(invocation_id, 1, 0)
+ SYSTEM_VALUE(tess_coord, 3, 0)
+ SYSTEM_VALUE(tess_level_outer, 4, 0)
+ SYSTEM_VALUE(tess_level_inner, 2, 0)
+ SYSTEM_VALUE(patch_vertices_in, 1, 0)
+ SYSTEM_VALUE(local_invocation_id, 3, 0)
+ SYSTEM_VALUE(work_group_id, 3, 0)
+ SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
+ SYSTEM_VALUE(num_work_groups, 3, 0)
+ SYSTEM_VALUE(helper_invocation, 1, 0)
+ 
+ /*
+  * Load operations pull data from some piece of GPU memory.  All load
+  * operations operate in terms of offsets into some piece of theoretical
+  * memory.  Loads from externally visible memory (UBO and SSBO) simply take a
+  * byte offset as a source.  Loads from opaque memory (uniforms, inputs, etc.)
+  * take a base+offset pair where the base (const_index[0]) gives the location
+  * of the start of the variable being loaded and and the offset source is a
+  * offset into that variable.
+  *
++ * Uniform load operations have a second index that specifies the size of the
++ * variable being loaded.  If const_index[1] == 0, then the size is unknown.
++ *
+  * Some load operations such as UBO/SSBO load and per_vertex loads take an
+  * additional source to specify which UBO/SSBO/vertex to load from.
+  *
+  * The exact address type depends on the lowering pass that generates the
+  * load/store intrinsics.  Typically, this is vec4 units for things such as
+  * varying slots and float units for fragment shader inputs.  UBO and SSBO
+  * offsets are always in bytes.
+  */
+ 
+ #define LOAD(name, srcs, indices, flags) \
+    INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags)
+ 
++/* src[] = { offset }. const_index[] = { base, size } */
++LOAD(uniform, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ /* src[] = { buffer_index, offset }. No const_index */
+ LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ /* src[] = { offset }. const_index[] = { base } */
+ LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ /* src[] = { vertex, offset }. const_index[] = { base } */
+ LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ /* src[] = { buffer_index, offset }. No const_index */
+ LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+ /* src[] = { offset }. const_index[] = { base } */
+ LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+ /* src[] = { vertex, offset }. const_index[] = { base } */
+ LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+ /* src[] = { offset }. const_index[] = { base } */
+ LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
++/* src[] = { offset }. const_index[] = { base, size } */
++LOAD(push_constant, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ 
+ /*
+  * Stores work the same way as loads, except now the first source is the value
+  * to store and the second (and possibly third) source specify where to store
+  * the value.  SSBO and shared memory stores also have a write mask as
+  * const_index[0].
+  */
+ 
+ #define STORE(name, srcs, indices, flags) \
+    INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)
+ 
+ /* src[] = { value, offset }. const_index[] = { base, write_mask } */
+ STORE(output, 2, 2, 0)
+ /* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */
+ STORE(per_vertex_output, 3, 2, 0)
+ /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
+ STORE(ssbo, 3, 1, 0)
+ /* src[] = { value, offset }. const_index[] = { base, write_mask } */
+ STORE(shared, 2, 2, 0)
+ 
+ LAST_INTRINSIC(store_shared)
diff --cc src/compiler/nir/nir_lower_alu_to_scalar.c

index 0000000000000000000000000000000000000000,0a27e66cf0f596b24714172b60799eaf6e59895e..37cb0221e0b609cd05c757bd360a5b3bd0ea6892

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@@ -1,0 -1,210 +1,264 @@@
- -   case nir_op_unpack_half_2x16:
- -      /* We could split this into unpack_half_2x16_split_[xy], but should
- -       * we?
- -       */
+ /*
+  * Copyright © 2014-2015 Broadcom
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  */
+ 
+ #include "nir.h"
+ #include "nir_builder.h"
+ 
+ /** @file nir_lower_alu_to_scalar.c
+  *
+  * Replaces nir_alu_instr operations with more than one channel used in the
+  * arguments with individual per-channel operations.
+  */
+ 
+ static void
+ nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)
+ {
+    nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+    instr->dest.write_mask = (1 << num_components) - 1;
+ }
+ 
+ static void
+ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
+                 nir_builder *builder)
+ {
+    unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
+ 
+    nir_ssa_def *last = NULL;
+    for (unsigned i = 0; i < num_components; i++) {
+       nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op);
+       nir_alu_ssa_dest_init(chan, 1);
+       nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
+       chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
+       if (nir_op_infos[chan_op].num_inputs > 1) {
+          assert(nir_op_infos[chan_op].num_inputs == 2);
+          nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
+          chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
+       }
+ 
+       nir_builder_instr_insert(builder, &chan->instr);
+ 
+       if (i == 0) {
+          last = &chan->dest.dest.ssa;
+       } else {
+          last = nir_build_alu(builder, merge_op,
+                               last, &chan->dest.dest.ssa, NULL, NULL);
+       }
+    }
+ 
+    assert(instr->dest.write_mask == 1);
+    nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last));
+    nir_instr_remove(&instr->instr);
+ }
+ 
+ static void
+ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
+ {
+    unsigned num_src = nir_op_infos[instr->op].num_inputs;
+    unsigned i, chan;
+ 
+    assert(instr->dest.dest.is_ssa);
+    assert(instr->dest.write_mask != 0);
+ 
+    b->cursor = nir_before_instr(&instr->instr);
+ 
+ #define LOWER_REDUCTION(name, chan, merge) \
+    case name##2: \
+    case name##3: \
+    case name##4: \
+       lower_reduction(instr, chan, merge, b); \
+       return;
+ 
+    switch (instr->op) {
+    case nir_op_vec4:
+    case nir_op_vec3:
+    case nir_op_vec2:
+       /* We don't need to scalarize these ops, they're the ones generated to
+        * group up outputs into a value that can be SSAed.
+        */
+       return;
+ 
++   case nir_op_pack_half_2x16:
++      if (!b->shader->options->lower_pack_half_2x16)
++         return;
++
++      nir_ssa_def *val =
++         nir_pack_half_2x16_split(b, nir_channel(b, instr->src[0].src.ssa,
++                                                 instr->src[0].swizzle[0]),
++                                     nir_channel(b, instr->src[0].src.ssa,
++                                                 instr->src[0].swizzle[1]));
++
++      nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
++      nir_instr_remove(&instr->instr);
++      return;
++
+    case nir_op_unpack_unorm_4x8:
+    case nir_op_unpack_snorm_4x8:
+    case nir_op_unpack_unorm_2x16:
+    case nir_op_unpack_snorm_2x16:
+       /* There is no scalar version of these ops, unless we were to break it
+        * down to bitshifts and math (which is definitely not intended).
+        */
+       return;
+ 
++   case nir_op_unpack_half_2x16: {
++      if (!b->shader->options->lower_unpack_half_2x16)
++         return;
++
++      nir_ssa_def *comps[2];
++      comps[0] = nir_unpack_half_2x16_split_x(b, instr->src[0].src.ssa);
++      comps[1] = nir_unpack_half_2x16_split_y(b, instr->src[0].src.ssa);
++      nir_ssa_def *vec = nir_vec(b, comps, 2);
++
++      nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
++      nir_instr_remove(&instr->instr);
+       return;
++   }
++
++   case nir_op_pack_uvec2_to_uint: {
++      assert(b->shader->options->lower_pack_snorm_2x16 ||
++             b->shader->options->lower_pack_unorm_2x16);
++
++      nir_ssa_def *word =
++         nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
++      nir_ssa_def *val =
++         nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
++                                nir_channel(b, word, 0));
++
++      nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
++      nir_instr_remove(&instr->instr);
++      break;
++   }
++
++   case nir_op_pack_uvec4_to_uint: {
++      assert(b->shader->options->lower_pack_snorm_4x8 ||
++             b->shader->options->lower_pack_unorm_4x8);
++
++      nir_ssa_def *byte =
++         nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
++      nir_ssa_def *val =
++         nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
++                               nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),
++                    nir_ior(b, nir_ishl(b, nir_channel(b, byte, 1), nir_imm_int(b, 8)),
++                               nir_channel(b, byte, 0)));
++
++      nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
++      nir_instr_remove(&instr->instr);
++      break;
++   }
+ 
+    case nir_op_fdph: {
+       nir_ssa_def *sum[4];
+       for (unsigned i = 0; i < 3; i++) {
+          sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa,
+                                           instr->src[0].swizzle[i]),
+                               nir_channel(b, instr->src[1].src.ssa,
+                                           instr->src[1].swizzle[i]));
+       }
+       sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]);
+ 
+       nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]),
+                                      nir_fadd(b, sum[2], sum[3]));
+ 
+       nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
+       nir_instr_remove(&instr->instr);
+       return;
+    }
+ 
+       LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
+       LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
+       LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
+       LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior);
+       LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior);
+       LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand);
+       LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for);
+ 
+    default:
+       break;
+    }
+ 
+    if (instr->dest.dest.ssa.num_components == 1)
+       return;
+ 
+    unsigned num_components = instr->dest.dest.ssa.num_components;
+    nir_ssa_def *comps[] = { NULL, NULL, NULL, NULL };
+ 
+    for (chan = 0; chan < 4; chan++) {
+       if (!(instr->dest.write_mask & (1 << chan)))
+          continue;
+ 
+       nir_alu_instr *lower = nir_alu_instr_create(b->shader, instr->op);
+       for (i = 0; i < num_src; i++) {
+          /* We only handle same-size-as-dest (input_sizes[] == 0) or scalar
+           * args (input_sizes[] == 1).
+           */
+          assert(nir_op_infos[instr->op].input_sizes[i] < 2);
+          unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?
+                               0 : chan);
+ 
+          nir_alu_src_copy(&lower->src[i], &instr->src[i], lower);
+          for (int j = 0; j < 4; j++)
+             lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
+       }
+ 
+       nir_alu_ssa_dest_init(lower, 1);
+       lower->dest.saturate = instr->dest.saturate;
+       comps[chan] = &lower->dest.dest.ssa;
+ 
+       nir_builder_instr_insert(b, &lower->instr);
+    }
+ 
+    nir_ssa_def *vec = nir_vec(b, comps, num_components);
+ 
+    nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
+ 
+    nir_instr_remove(&instr->instr);
+ }
+ 
+ static bool
+ lower_alu_to_scalar_block(nir_block *block, void *builder)
+ {
+    nir_foreach_instr_safe(block, instr) {
+       if (instr->type == nir_instr_type_alu)
+          lower_alu_instr_scalar(nir_instr_as_alu(instr), builder);
+    }
+ 
+    return true;
+ }
+ 
+ static void
+ nir_lower_alu_to_scalar_impl(nir_function_impl *impl)
+ {
+    nir_builder builder;
+    nir_builder_init(&builder, impl);
+ 
+    nir_foreach_block(impl, lower_alu_to_scalar_block, &builder);
+ }
+ 
+ void
+ nir_lower_alu_to_scalar(nir_shader *shader)
+ {
+    nir_foreach_function(shader, function) {
+       if (function->impl)
+          nir_lower_alu_to_scalar_impl(function->impl);
+    }
+ }
diff --cc src/compiler/nir/nir_lower_atomics.c

index 0000000000000000000000000000000000000000,1a4458d4f84e99c8634fde1687330020372be261..b07e199d71be69b9f53bc463829b1e5d8ab56ae8

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_lower_atomics.c
+++ b/src/compiler/nir/nir_lower_atomics.c
@@@ -1,0 -1,166 +1,167 @@@
- -       instr->variables[0]->var->data.mode != nir_var_shader_storage)
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ #include "compiler/glsl/ir_uniform.h"
+ #include "nir.h"
+ #include "main/config.h"
+ #include <assert.h>
+ 
+ typedef struct {
+    const struct gl_shader_program *shader_program;
+    nir_shader   *shader;
+ } lower_atomic_state;
+ 
+ /*
+  * replace atomic counter intrinsics that use a variable with intrinsics
+  * that directly store the buffer index and byte offset
+  */
+ 
+ static void
+ lower_instr(nir_intrinsic_instr *instr,
+             lower_atomic_state *state)
+ {
+    nir_intrinsic_op op;
+    switch (instr->intrinsic) {
+    case nir_intrinsic_atomic_counter_read_var:
+       op = nir_intrinsic_atomic_counter_read;
+       break;
+ 
+    case nir_intrinsic_atomic_counter_inc_var:
+       op = nir_intrinsic_atomic_counter_inc;
+       break;
+ 
+    case nir_intrinsic_atomic_counter_dec_var:
+       op = nir_intrinsic_atomic_counter_dec;
+       break;
+ 
+    default:
+       return;
+    }
+ 
+    if (instr->variables[0]->var->data.mode != nir_var_uniform &&
++       instr->variables[0]->var->data.mode != nir_var_shader_storage &&
++       instr->variables[0]->var->data.mode != nir_var_shared)
+       return; /* atomics passed as function arguments can't be lowered */
+ 
+    void *mem_ctx = ralloc_parent(instr);
+    unsigned uniform_loc = instr->variables[0]->var->data.location;
+ 
+    nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
+    new_instr->const_index[0] =
+       state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index;
+ 
+    nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
+    offset_const->value.u[0] = instr->variables[0]->var->data.offset;
+ 
+    nir_instr_insert_before(&instr->instr, &offset_const->instr);
+ 
+    nir_ssa_def *offset_def = &offset_const->def;
+ 
+    nir_deref *tail = &instr->variables[0]->deref;
+    while (tail->child != NULL) {
+       assert(tail->child->deref_type == nir_deref_type_array);
+       nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+       tail = tail->child;
+ 
+       unsigned child_array_elements = tail->child != NULL ?
+          glsl_get_aoa_size(tail->type) : 1;
+ 
+       offset_const->value.u[0] += deref_array->base_offset *
+          child_array_elements * ATOMIC_COUNTER_SIZE;
+ 
+       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+          nir_load_const_instr *atomic_counter_size =
+                nir_load_const_instr_create(mem_ctx, 1);
+          atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
+          nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);
+ 
+          nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
+          nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+          mul->dest.write_mask = 0x1;
+          nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
+          mul->src[1].src.is_ssa = true;
+          mul->src[1].src.ssa = &atomic_counter_size->def;
+          nir_instr_insert_before(&instr->instr, &mul->instr);
+ 
+          nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd);
+          nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+          add->dest.write_mask = 0x1;
+          add->src[0].src.is_ssa = true;
+          add->src[0].src.ssa = &mul->dest.dest.ssa;
+          add->src[1].src.is_ssa = true;
+          add->src[1].src.ssa = offset_def;
+          nir_instr_insert_before(&instr->instr, &add->instr);
+ 
+          offset_def = &add->dest.dest.ssa;
+       }
+    }
+ 
+    new_instr->src[0].is_ssa = true;
+    new_instr->src[0].ssa = offset_def;
+ 
+    if (instr->dest.is_ssa) {
+       nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
+                         instr->dest.ssa.num_components, NULL);
+       nir_ssa_def_rewrite_uses(&instr->dest.ssa,
+                                nir_src_for_ssa(&new_instr->dest.ssa));
+    } else {
+       nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
+    }
+ 
+    nir_instr_insert_before(&instr->instr, &new_instr->instr);
+    nir_instr_remove(&instr->instr);
+ }
+ 
+ static bool
+ lower_block(nir_block *block, void *state)
+ {
+    nir_foreach_instr_safe(block, instr) {
+       if (instr->type == nir_instr_type_intrinsic)
+          lower_instr(nir_instr_as_intrinsic(instr),
+                      (lower_atomic_state *) state);
+    }
+ 
+    return true;
+ }
+ 
+ void
+ nir_lower_atomics(nir_shader *shader,
+                   const struct gl_shader_program *shader_program)
+ {
+    lower_atomic_state state = {
+       .shader = shader,
+       .shader_program = shader_program,
+    };
+ 
+    nir_foreach_function(shader, function) {
+       if (function->impl) {
+          nir_foreach_block(function->impl, lower_block, (void *) &state);
+          nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                                nir_metadata_dominance);
+       }
+    }
+ }
diff --cc src/compiler/nir/nir_lower_indirect_derefs.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..69f2df4ba6d6463c4462566b557b2e3d1c1d74c3

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/nir_lower_indirect_derefs.c
@@@ -1,0 -1,0 +1,239 @@@
++/*
++ * Copyright © 2016 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "nir.h"
++#include "nir_builder.h"
++
++static void
++emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
++                nir_deref_var *deref, nir_deref *tail,
++                nir_ssa_def **dest, nir_ssa_def *src);
++
++static void
++emit_indirect_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
++                         nir_deref_var *deref, nir_deref *arr_parent,
++                         int start, int end,
++                         nir_ssa_def **dest, nir_ssa_def *src)
++{
++   assert(arr_parent->child &&
++          arr_parent->child->deref_type == nir_deref_type_array);
++   nir_deref_array *arr = nir_deref_as_array(arr_parent->child);
++   assert(arr->deref_array_type == nir_deref_array_type_indirect);
++   assert(arr->indirect.is_ssa);
++
++   assert(start < end);
++   if (start == end - 1) {
++      /* Base case.  Just emit the load/store op */
++      nir_deref_array direct = *arr;
++      direct.deref_array_type = nir_deref_array_type_direct;
++      direct.base_offset += start;
++      direct.indirect = NIR_SRC_INIT;
++
++      arr_parent->child = &direct.deref;
++      emit_load_store(b, orig_instr, deref, &arr->deref, dest, src);
++      arr_parent->child = &arr->deref;
++   } else {
++      int mid = start + (end - start) / 2;
++
++      nir_ssa_def *then_dest, *else_dest;
++
++      nir_if *if_stmt = nir_if_create(b->shader);
++      if_stmt->condition = nir_src_for_ssa(nir_ilt(b, arr->indirect.ssa,
++                                                      nir_imm_int(b, mid)));
++      nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
++
++      b->cursor = nir_after_cf_list(&if_stmt->then_list);
++      emit_indirect_load_store(b, orig_instr, deref, arr_parent,
++                               start, mid, &then_dest, src);
++
++      b->cursor = nir_after_cf_list(&if_stmt->else_list);
++      emit_indirect_load_store(b, orig_instr, deref, arr_parent,
++                               mid, end, &else_dest, src);
++
++      b->cursor = nir_after_cf_node(&if_stmt->cf_node);
++
++      if (src == NULL) {
++         /* We're a load.  We need to insert a phi node */
++         nir_phi_instr *phi = nir_phi_instr_create(b->shader);
++         nir_ssa_dest_init(&phi->instr, &phi->dest,
++                           then_dest->num_components, NULL);
++
++         nir_phi_src *src0 = ralloc(phi, nir_phi_src);
++         src0->pred = nir_cf_node_as_block(nir_if_last_then_node(if_stmt));
++         src0->src = nir_src_for_ssa(then_dest);
++         exec_list_push_tail(&phi->srcs, &src0->node);
++
++         nir_phi_src *src1 = ralloc(phi, nir_phi_src);
++         src1->pred = nir_cf_node_as_block(nir_if_last_else_node(if_stmt));
++         src1->src = nir_src_for_ssa(else_dest);
++         exec_list_push_tail(&phi->srcs, &src1->node);
++
++         nir_builder_instr_insert(b, &phi->instr);
++         *dest = &phi->dest.ssa;
++      }
++   }
++}
++
++static void
++emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
++                nir_deref_var *deref, nir_deref *tail,
++                nir_ssa_def **dest, nir_ssa_def *src)
++{
++   for (; tail->child; tail = tail->child) {
++      if (tail->child->deref_type != nir_deref_type_array)
++         continue;
++
++      nir_deref_array *arr = nir_deref_as_array(tail->child);
++      if (arr->deref_array_type != nir_deref_array_type_indirect)
++         continue;
++
++      int length = glsl_get_length(tail->type);
++
++      emit_indirect_load_store(b, orig_instr, deref, tail, -arr->base_offset,
++                               length - arr->base_offset, dest, src);
++      return;
++   }
++
++   assert(tail && tail->child == NULL);
++
++   /* We reached the end of the deref chain.  Emit the instruction */
++
++   if (src == NULL) {
++      /* This is a load instruction */
++      nir_intrinsic_instr *load =
++         nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
++      load->num_components = orig_instr->num_components;
++      load->variables[0] =
++         nir_deref_as_var(nir_copy_deref(load, &deref->deref));
++      nir_ssa_dest_init(&load->instr, &load->dest,
++                        load->num_components, NULL);
++      nir_builder_instr_insert(b, &load->instr);
++      *dest = &load->dest.ssa;
++   } else {
++      /* This is a store instruction */
++      nir_intrinsic_instr *store =
++         nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
++      store->num_components = orig_instr->num_components;
++      store->const_index[0] = orig_instr->const_index[0]; /* writemask */
++      store->variables[0] =
++         nir_deref_as_var(nir_copy_deref(store, &deref->deref));
++      store->src[0] = nir_src_for_ssa(src);
++      nir_builder_instr_insert(b, &store->instr);
++   }
++}
++
++static bool
++deref_has_indirect(nir_deref_var *deref)
++{
++   for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
++      if (tail->deref_type != nir_deref_type_array)
++         continue;
++
++      nir_deref_array *arr = nir_deref_as_array(tail);
++      if (arr->deref_array_type == nir_deref_array_type_indirect)
++         return true;
++   }
++
++   return false;
++}
++
++struct lower_indirect_state {
++   nir_builder builder;
++   uint32_t mode_mask;
++   bool progress;
++};
++
++static bool
++lower_indirect_block(nir_block *block, void *void_state)
++{
++   struct lower_indirect_state *state = void_state;
++
++   nir_foreach_instr_safe(block, instr) {
++      if (instr->type != nir_instr_type_intrinsic)
++         continue;
++
++      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
++      if (intrin->intrinsic != nir_intrinsic_load_var &&
++          intrin->intrinsic != nir_intrinsic_store_var)
++         continue;
++
++      if (!deref_has_indirect(intrin->variables[0]))
++         continue;
++
++      /* Only lower variables whose mode is in the mask */
++      if (!(state->mode_mask & (1 << intrin->variables[0]->var->data.mode)))
++         continue;
++
++      state->builder.cursor = nir_before_instr(&intrin->instr);
++
++      if (intrin->intrinsic == nir_intrinsic_load_var) {
++         nir_ssa_def *result;
++         emit_load_store(&state->builder, intrin, intrin->variables[0],
++                         &intrin->variables[0]->deref, &result, NULL);
++         nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(result));
++      } else {
++         assert(intrin->src[0].is_ssa);
++         emit_load_store(&state->builder, intrin, intrin->variables[0],
++                         &intrin->variables[0]->deref, NULL, intrin->src[0].ssa);
++      }
++      nir_instr_remove(&intrin->instr);
++      state->progress = true;
++   }
++
++   return true;
++}
++
++static bool
++lower_indirects_impl(nir_function_impl *impl, uint32_t mode_mask)
++{
++   struct lower_indirect_state state;
++
++   state.progress = false;
++   state.mode_mask = mode_mask;
++   nir_builder_init(&state.builder, impl);
++
++   nir_foreach_block(impl, lower_indirect_block, &state);
++
++   if (state.progress)
++      nir_metadata_preserve(impl, nir_metadata_none);
++
++   return state.progress;
++}
++
++/** Lowers indirect variable loads/stores to direct loads/stores.
++ *
++ * The pass works by replacing any indirect load or store with an if-ladder
++ * that does a binary search on the array index.
++ */
++bool
++nir_lower_indirect_derefs(nir_shader *shader, uint32_t mode_mask)
++{
++   bool progress = false;
++
++   nir_foreach_function(shader, function) {
++      if (function->impl)
++         progress = lower_indirects_impl(function->impl, mode_mask) || progress;
++   }
++
++   return progress;
++}
diff --cc src/compiler/nir/nir_lower_io.c

index 0000000000000000000000000000000000000000,80c5151f0ea03f7850e1e88a196bae1b0603d3af..2c5fa16af5e29aa7644057ca9124b66846053a01

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@@ -1,0 -1,350 +1,461 @@@
- -      if (intrin->intrinsic != nir_intrinsic_load_var &&
- -          intrin->intrinsic != nir_intrinsic_store_var)
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *    Jason Ekstrand (jason@jlekstrand.net)
+  *
+  */
+ 
+ /*
+  * This lowering pass converts references to input/output variables with
+  * loads/stores to actual input/output intrinsics.
+  */
+ 
+ #include "nir.h"
+ #include "nir_builder.h"
+ 
+ struct lower_io_state {
+    nir_builder builder;
+    void *mem_ctx;
+    int (*type_size)(const struct glsl_type *type);
+    nir_variable_mode mode;
+ };
+ 
+ void
+ nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
+                          int (*type_size)(const struct glsl_type *))
+ {
+    unsigned location = 0;
+ 
+    nir_foreach_variable(var, var_list) {
+       /*
+        * UBO's have their own address spaces, so don't count them towards the
+        * number of global uniforms
+        */
+       if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) &&
+           var->interface_type != NULL)
+          continue;
+ 
+       var->data.driver_location = location;
+       location += type_size(var->type);
+    }
+ 
+    *size = location;
+ }
+ 
+ /**
+  * Returns true if we're processing a stage whose inputs are arrays indexed
+  * by a vertex number (such as geometry shader inputs).
+  */
+ static bool
+ is_per_vertex_input(struct lower_io_state *state, nir_variable *var)
+ {
+    gl_shader_stage stage = state->builder.shader->stage;
+ 
+    return var->data.mode == nir_var_shader_in && !var->data.patch &&
+           (stage == MESA_SHADER_TESS_CTRL ||
+            stage == MESA_SHADER_TESS_EVAL ||
+            stage == MESA_SHADER_GEOMETRY);
+ }
+ 
+ static bool
+ is_per_vertex_output(struct lower_io_state *state, nir_variable *var)
+ {
+    gl_shader_stage stage = state->builder.shader->stage;
+    return var->data.mode == nir_var_shader_out && !var->data.patch &&
+           stage == MESA_SHADER_TESS_CTRL;
+ }
+ 
+ static nir_ssa_def *
+ get_io_offset(nir_builder *b, nir_deref_var *deref,
+               nir_ssa_def **vertex_index,
+               int (*type_size)(const struct glsl_type *))
+ {
+    nir_deref *tail = &deref->deref;
+ 
+    /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
+     * outermost array index separate.  Process the rest normally.
+     */
+    if (vertex_index != NULL) {
+       tail = tail->child;
+       assert(tail->deref_type == nir_deref_type_array);
+       nir_deref_array *deref_array = nir_deref_as_array(tail);
+ 
+       nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset);
+       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+          vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1));
+       }
+       *vertex_index = vtx;
+    }
+ 
+    /* Just emit code and let constant-folding go to town */
+    nir_ssa_def *offset = nir_imm_int(b, 0);
+ 
+    while (tail->child != NULL) {
+       const struct glsl_type *parent_type = tail->type;
+       tail = tail->child;
+ 
+       if (tail->deref_type == nir_deref_type_array) {
+          nir_deref_array *deref_array = nir_deref_as_array(tail);
+          unsigned size = type_size(tail->type);
+ 
+          offset = nir_iadd(b, offset,
+                            nir_imm_int(b, size * deref_array->base_offset));
+ 
+          if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+             nir_ssa_def *mul =
+                nir_imul(b, nir_imm_int(b, size),
+                         nir_ssa_for_src(b, deref_array->indirect, 1));
+ 
+             offset = nir_iadd(b, offset, mul);
+          }
+       } else if (tail->deref_type == nir_deref_type_struct) {
+          nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+ 
+          unsigned field_offset = 0;
+          for (unsigned i = 0; i < deref_struct->index; i++) {
+             field_offset += type_size(glsl_get_struct_field(parent_type, i));
+          }
+          offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
+       }
+    }
+ 
+    return offset;
+ }
+ 
+ static nir_intrinsic_op
+ load_op(struct lower_io_state *state,
+         nir_variable_mode mode, bool per_vertex)
+ {
+    nir_intrinsic_op op;
+    switch (mode) {
+    case nir_var_shader_in:
+       op = per_vertex ? nir_intrinsic_load_per_vertex_input :
+                         nir_intrinsic_load_input;
+       break;
+    case nir_var_shader_out:
+       op = per_vertex ? nir_intrinsic_load_per_vertex_output :
+                         nir_intrinsic_load_output;
+       break;
+    case nir_var_uniform:
+       op = nir_intrinsic_load_uniform;
+       break;
++   case nir_var_shared:
++      op = nir_intrinsic_load_shared;
++      break;
+    default:
+       unreachable("Unknown variable mode");
+    }
+    return op;
+ }
+ 
++static nir_intrinsic_op
++store_op(struct lower_io_state *state,
++         nir_variable_mode mode, bool per_vertex)
++{
++   nir_intrinsic_op op;
++   switch (mode) {
++   case nir_var_shader_in:
++   case nir_var_shader_out:
++      op = per_vertex ? nir_intrinsic_store_per_vertex_output :
++                        nir_intrinsic_store_output;
++      break;
++   case nir_var_shared:
++      op = nir_intrinsic_store_shared;
++      break;
++   default:
++      unreachable("Unknown variable mode");
++   }
++   return op;
++}
++
++static nir_intrinsic_op
++atomic_op(nir_intrinsic_op opcode)
++{
++   switch (opcode) {
++#define OP(O) case nir_intrinsic_var_##O: return nir_intrinsic_shared_##O;
++   OP(atomic_exchange)
++   OP(atomic_comp_swap)
++   OP(atomic_add)
++   OP(atomic_imin)
++   OP(atomic_umin)
++   OP(atomic_imax)
++   OP(atomic_umax)
++   OP(atomic_and)
++   OP(atomic_or)
++   OP(atomic_xor)
++#undef OP
++   default:
++      unreachable("Invalid atomic");
++   }
++}
++
+ static bool
+ nir_lower_io_block(nir_block *block, void *void_state)
+ {
+    struct lower_io_state *state = void_state;
+ 
+    nir_builder *b = &state->builder;
+ 
+    nir_foreach_instr_safe(block, instr) {
+       if (instr->type != nir_instr_type_intrinsic)
+          continue;
+ 
+       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ 
- -         assert(mode == nir_var_shader_out);
++      switch (intrin->intrinsic) {
++      case nir_intrinsic_load_var:
++      case nir_intrinsic_store_var:
++      case nir_intrinsic_var_atomic_add:
++      case nir_intrinsic_var_atomic_imin:
++      case nir_intrinsic_var_atomic_umin:
++      case nir_intrinsic_var_atomic_imax:
++      case nir_intrinsic_var_atomic_umax:
++      case nir_intrinsic_var_atomic_and:
++      case nir_intrinsic_var_atomic_or:
++      case nir_intrinsic_var_atomic_xor:
++      case nir_intrinsic_var_atomic_exchange:
++      case nir_intrinsic_var_atomic_comp_swap:
++         /* We can lower the io for this nir instrinsic */
++         break;
++      default:
++         /* We can't lower the io for this nir instrinsic, so skip it */
+          continue;
++      }
+ 
+       nir_variable_mode mode = intrin->variables[0]->var->data.mode;
+ 
+       if (state->mode != nir_var_all && state->mode != mode)
+          continue;
+ 
+       if (mode != nir_var_shader_in &&
+           mode != nir_var_shader_out &&
++          mode != nir_var_shared &&
+           mode != nir_var_uniform)
+          continue;
+ 
+       b->cursor = nir_before_instr(instr);
+ 
+       switch (intrin->intrinsic) {
+       case nir_intrinsic_load_var: {
+          bool per_vertex =
+             is_per_vertex_input(state, intrin->variables[0]->var) ||
+             is_per_vertex_output(state, intrin->variables[0]->var);
+ 
+          nir_ssa_def *offset;
+          nir_ssa_def *vertex_index;
+ 
+          offset = get_io_offset(b, intrin->variables[0],
+                                 per_vertex ? &vertex_index : NULL,
+                                 state->type_size);
+ 
+          nir_intrinsic_instr *load =
+             nir_intrinsic_instr_create(state->mem_ctx,
+                                        load_op(state, mode, per_vertex));
+          load->num_components = intrin->num_components;
+ 
+          load->const_index[0] =
+             intrin->variables[0]->var->data.driver_location;
+ 
++         if (load->intrinsic == nir_intrinsic_load_uniform) {
++            load->const_index[1] =
++               state->type_size(intrin->variables[0]->var->type);
++         }
++
+          if (per_vertex)
+             load->src[0] = nir_src_for_ssa(vertex_index);
+ 
+          load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset);
+ 
+          if (intrin->dest.is_ssa) {
+             nir_ssa_dest_init(&load->instr, &load->dest,
+                               intrin->num_components, NULL);
+             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                      nir_src_for_ssa(&load->dest.ssa));
+          } else {
+             nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx);
+          }
+ 
+          nir_instr_insert_before(&intrin->instr, &load->instr);
+          nir_instr_remove(&intrin->instr);
+          break;
+       }
+ 
+       case nir_intrinsic_store_var: {
- -         nir_intrinsic_op store_op =
- -            per_vertex ? nir_intrinsic_store_per_vertex_output :
- -                         nir_intrinsic_store_output;
- -
- -         nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
- -                                                                 store_op);
++         assert(mode == nir_var_shader_out || mode == nir_var_shared);
+ 
+          nir_ssa_def *offset;
+          nir_ssa_def *vertex_index;
+ 
+          bool per_vertex =
+             is_per_vertex_output(state, intrin->variables[0]->var);
+ 
+          offset = get_io_offset(b, intrin->variables[0],
+                                 per_vertex ? &vertex_index : NULL,
+                                 state->type_size);
+ 
++         nir_intrinsic_instr *store =
++            nir_intrinsic_instr_create(state->mem_ctx,
++                                       store_op(state, mode, per_vertex));
+          store->num_components = intrin->num_components;
+ 
+          nir_src_copy(&store->src[0], &intrin->src[0], store);
+ 
+          store->const_index[0] =
+             intrin->variables[0]->var->data.driver_location;
+ 
+          /* Copy the writemask */
+          store->const_index[1] = intrin->const_index[0];
+ 
+          if (per_vertex)
+             store->src[1] = nir_src_for_ssa(vertex_index);
+ 
+          store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset);
+ 
+          nir_instr_insert_before(&intrin->instr, &store->instr);
+          nir_instr_remove(&intrin->instr);
+          break;
+       }
+ 
++      case nir_intrinsic_var_atomic_add:
++      case nir_intrinsic_var_atomic_imin:
++      case nir_intrinsic_var_atomic_umin:
++      case nir_intrinsic_var_atomic_imax:
++      case nir_intrinsic_var_atomic_umax:
++      case nir_intrinsic_var_atomic_and:
++      case nir_intrinsic_var_atomic_or:
++      case nir_intrinsic_var_atomic_xor:
++      case nir_intrinsic_var_atomic_exchange:
++      case nir_intrinsic_var_atomic_comp_swap: {
++         assert(mode == nir_var_shared);
++
++         nir_ssa_def *offset;
++
++         offset = get_io_offset(b, intrin->variables[0],
++                                NULL, state->type_size);
++
++         nir_intrinsic_instr *atomic =
++            nir_intrinsic_instr_create(state->mem_ctx,
++                                       atomic_op(intrin->intrinsic));
++
++         atomic->src[0] = nir_src_for_ssa(offset);
++
++         atomic->const_index[0] =
++            intrin->variables[0]->var->data.driver_location;
++
++         nir_src_copy(&atomic->src[1], &intrin->src[0], atomic);
++
++         if (intrin->intrinsic == nir_intrinsic_var_atomic_comp_swap)
++            nir_src_copy(&atomic->src[2], &intrin->src[1], atomic);
++
++         if (intrin->dest.is_ssa) {
++            nir_ssa_dest_init(&atomic->instr, &atomic->dest,
++                              intrin->dest.ssa.num_components, NULL);
++            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
++                                     nir_src_for_ssa(&atomic->dest.ssa));
++         } else {
++            nir_dest_copy(&atomic->dest, &intrin->dest, state->mem_ctx);
++         }
++
++         nir_instr_insert_before(&intrin->instr, &atomic->instr);
++         nir_instr_remove(&intrin->instr);
++         break;
++      }
++
+       default:
+          break;
+       }
+    }
+ 
+    return true;
+ }
+ 
+ static void
+ nir_lower_io_impl(nir_function_impl *impl,
+                   nir_variable_mode mode,
+                   int (*type_size)(const struct glsl_type *))
+ {
+    struct lower_io_state state;
+ 
+    nir_builder_init(&state.builder, impl);
+    state.mem_ctx = ralloc_parent(impl);
+    state.mode = mode;
+    state.type_size = type_size;
+ 
+    nir_foreach_block(impl, nir_lower_io_block, &state);
+ 
+    nir_metadata_preserve(impl, nir_metadata_block_index |
+                                nir_metadata_dominance);
+ }
+ 
+ void
+ nir_lower_io(nir_shader *shader, nir_variable_mode mode,
+              int (*type_size)(const struct glsl_type *))
+ {
+    nir_foreach_function(shader, function) {
+       if (function->impl)
+          nir_lower_io_impl(function->impl, mode, type_size);
+    }
+ }
+ 
+ /**
+  * Return the offset soruce for a load/store intrinsic.
+  */
+ nir_src *
+ nir_get_io_offset_src(nir_intrinsic_instr *instr)
+ {
+    switch (instr->intrinsic) {
+    case nir_intrinsic_load_input:
+    case nir_intrinsic_load_output:
+    case nir_intrinsic_load_uniform:
+       return &instr->src[0];
++   case nir_intrinsic_load_ubo:
++   case nir_intrinsic_load_ssbo:
+    case nir_intrinsic_load_per_vertex_input:
+    case nir_intrinsic_load_per_vertex_output:
+    case nir_intrinsic_store_output:
+       return &instr->src[1];
++   case nir_intrinsic_store_ssbo:
+    case nir_intrinsic_store_per_vertex_output:
+       return &instr->src[2];
+    default:
+       return NULL;
+    }
+ }
+ 
+ /**
+  * Return the vertex index source for a load/store per_vertex intrinsic.
+  */
+ nir_src *
+ nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
+ {
+    switch (instr->intrinsic) {
+    case nir_intrinsic_load_per_vertex_input:
+    case nir_intrinsic_load_per_vertex_output:
+       return &instr->src[0];
+    case nir_intrinsic_store_per_vertex_output:
+       return &instr->src[1];
+    default:
+       return NULL;
+    }
+ }
diff --cc src/compiler/nir/nir_lower_outputs_to_temporaries.c

index 0000000000000000000000000000000000000000,71b06b81fcceace1712108a45f1eb540ef44f47d..00ac09114cf9cddd21766ce05532dc174e1b0815

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_lower_outputs_to_temporaries.c
+++ b/src/compiler/nir/nir_lower_outputs_to_temporaries.c
@@@ -1,0 -1,133 +1,136 @@@
- -nir_lower_outputs_to_temporaries(nir_shader *shader)
+ /*
+  * Copyright © 2015 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  */
+ 
+ /*
+  * Implements a pass that lowers output variables to a temporary plus an
+  * output variable with a single copy at each exit point of the shader.
+  * This way the output variable is only ever written.
+  *
+  * Because valid NIR requires that output variables are never read, this
+  * pass is more of a helper for NIR producers and must be run before the
+  * shader is ever validated.
+  */
+ 
+ #include "nir.h"
+ 
+ struct lower_outputs_state {
+    nir_shader *shader;
+    struct exec_list old_outputs;
+ };
+ 
+ static void
+ emit_output_copies(nir_cursor cursor, struct lower_outputs_state *state)
+ {
+    assert(exec_list_length(&state->shader->outputs) ==
+           exec_list_length(&state->old_outputs));
+ 
+    foreach_two_lists(out_node, &state->shader->outputs,
+                      temp_node, &state->old_outputs) {
+       nir_variable *output = exec_node_data(nir_variable, out_node, node);
+       nir_variable *temp = exec_node_data(nir_variable, temp_node, node);
+ 
+       nir_intrinsic_instr *copy =
+          nir_intrinsic_instr_create(state->shader, nir_intrinsic_copy_var);
+       copy->variables[0] = nir_deref_var_create(copy, output);
+       copy->variables[1] = nir_deref_var_create(copy, temp);
+ 
+       nir_instr_insert(cursor, &copy->instr);
+    }
+ }
+ 
+ static bool
+ emit_output_copies_block(nir_block *block, void *state)
+ {
+    nir_foreach_instr(block, instr) {
+       if (instr->type != nir_instr_type_intrinsic)
+          continue;
+ 
+       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+       if (intrin->intrinsic == nir_intrinsic_emit_vertex)
+          emit_output_copies(nir_before_instr(&intrin->instr), state);
+    }
+ 
+    return true;
+ }
+ 
+ void
- -      } else if (strcmp(function->name, "main") == 0) {
++nir_lower_outputs_to_temporaries(nir_shader *shader, nir_function *entrypoint)
+ {
+    struct lower_outputs_state state;
+ 
+    if (shader->stage == MESA_SHADER_TESS_CTRL)
+       return;
+ 
+    state.shader = shader;
+    exec_list_move_nodes_to(&shader->outputs, &state.old_outputs);
+ 
+    /* Walk over all of the outputs turn each output into a temporary and
+     * make a new variable for the actual output.
+     */
+    nir_foreach_variable(var, &state.old_outputs) {
+       nir_variable *output = ralloc(shader, nir_variable);
+       memcpy(output, var, sizeof *output);
+ 
+       /* The orignal is now the temporary */
+       nir_variable *temp = var;
+ 
+       /* Reparent the name to the new variable */
+       ralloc_steal(output, output->name);
+ 
++      /* Reparent the constant initializer (if any) */
++      ralloc_steal(output, output->constant_initializer);
++
+       /* Give the output a new name with @out-temp appended */
+       temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
+       temp->data.mode = nir_var_global;
+       temp->constant_initializer = NULL;
+ 
+       exec_list_push_tail(&shader->outputs, &output->node);
+    }
+ 
+    nir_foreach_function(shader, function) {
+       if (function->impl == NULL)
+          continue;
+ 
+       if (shader->stage == MESA_SHADER_GEOMETRY) {
+          /* For geometry shaders, we have to emit the output copies right
+           * before each EmitVertex call.
+           */
+          nir_foreach_block(function->impl, emit_output_copies_block, &state);
++      } else if (function == entrypoint) {
+          /* For all other shader types, we need to do the copies right before
+           * the jumps to the end block.
+           */
+          struct set_entry *block_entry;
+          set_foreach(function->impl->end_block->predecessors, block_entry) {
+             struct nir_block *block = (void *)block_entry->key;
+             emit_output_copies(nir_after_block_before_jump(block), &state);
+          }
+       }
+ 
+       nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                             nir_metadata_dominance);
+    }
+ 
+    exec_list_append(&shader->globals, &state.old_outputs);
+ }
diff --cc src/compiler/nir/nir_lower_returns.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..91bb2f7dfeb1c043d9fbc81f722a7dff35c118d6

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/nir_lower_returns.c
@@@ -1,0 -1,0 +1,246 @@@
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "nir.h"
++#include "nir_builder.h"
++#include "nir_control_flow.h"
++
++struct lower_returns_state {
++   nir_builder builder;
++   struct exec_list *cf_list;
++   nir_loop *loop;
++   nir_variable *return_flag;
++};
++
++static bool lower_returns_in_cf_list(struct exec_list *cf_list,
++                                     struct lower_returns_state *state);
++
++static void
++predicate_following(nir_cf_node *node, struct lower_returns_state *state)
++{
++   nir_builder *b = &state->builder;
++   b->cursor = nir_after_cf_node_and_phis(node);
++
++   if (nir_cursors_equal(b->cursor, nir_after_cf_list(state->cf_list)))
++      return; /* Nothing to predicate */
++
++   assert(state->return_flag);
++
++   nir_if *if_stmt = nir_if_create(b->shader);
++   if_stmt->condition = nir_src_for_ssa(nir_load_var(b, state->return_flag));
++   nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
++
++   if (state->loop) {
++      /* If we're inside of a loop, then all we need to do is insert a
++       * conditional break.
++       */
++      nir_jump_instr *brk =
++         nir_jump_instr_create(state->builder.shader, nir_jump_break);
++      nir_instr_insert(nir_before_cf_list(&if_stmt->then_list), &brk->instr);
++   } else {
++      /* Otherwise, we need to actually move everything into the else case
++       * of the if statement.
++       */
++      nir_cf_list list;
++      nir_cf_extract(&list, nir_after_cf_node(&if_stmt->cf_node),
++                            nir_after_cf_list(state->cf_list));
++      assert(!exec_list_is_empty(&list.list));
++      nir_cf_reinsert(&list, nir_before_cf_list(&if_stmt->else_list));
++   }
++}
++
++static bool
++lower_returns_in_loop(nir_loop *loop, struct lower_returns_state *state)
++{
++   nir_loop *parent = state->loop;
++   state->loop = loop;
++   bool progress = lower_returns_in_cf_list(&loop->body, state);
++   state->loop = parent;
++
++   /* If the recursive call made progress, then there were returns inside
++    * of the loop.  These would have been lowered to breaks with the return
++    * flag set to true.  We need to predicate everything following the loop
++    * on the return flag.
++    */
++   if (progress)
++      predicate_following(&loop->cf_node, state);
++
++   return progress;
++}
++
++static bool
++lower_returns_in_if(nir_if *if_stmt, struct lower_returns_state *state)
++{
++   bool progress;
++
++   progress = lower_returns_in_cf_list(&if_stmt->then_list, state);
++   progress = lower_returns_in_cf_list(&if_stmt->else_list, state) || progress;
++
++   /* If either of the recursive calls made progress, then there were
++    * returns inside of the body of the if.  If we're in a loop, then these
++    * were lowered to breaks which automatically skip to the end of the
++    * loop so we don't have to do anything.  If we're not in a loop, then
++    * all we know is that the return flag is set appropreately and that the
++    * recursive calls ensured that nothing gets executed *inside* the if
++    * after a return.  In order to ensure nothing outside gets executed
++    * after a return, we need to predicate everything following on the
++    * return flag.
++    */
++   if (progress && !state->loop)
++      predicate_following(&if_stmt->cf_node, state);
++
++   return progress;
++}
++
++static bool
++lower_returns_in_block(nir_block *block, struct lower_returns_state *state)
++{
++   if (block->predecessors->entries == 0 &&
++       block != nir_start_block(state->builder.impl)) {
++      /* This block is unreachable.  Delete it and everything after it. */
++      nir_cf_list list;
++      nir_cf_extract(&list, nir_before_cf_node(&block->cf_node),
++                            nir_after_cf_list(state->cf_list));
++
++      if (exec_list_is_empty(&list.list)) {
++         /* There's nothing here, which also means there's nothing in this
++          * block so we have nothing to do.
++          */
++         return false;
++      } else {
++         nir_cf_delete(&list);
++         return true;
++      }
++   }
++
++   nir_instr *last_instr = nir_block_last_instr(block);
++   if (last_instr == NULL)
++      return false;
++
++   if (last_instr->type != nir_instr_type_jump)
++      return false;
++
++   nir_jump_instr *jump = nir_instr_as_jump(last_instr);
++   if (jump->type != nir_jump_return)
++      return false;
++
++   nir_instr_remove(&jump->instr);
++
++   nir_builder *b = &state->builder;
++   b->cursor = nir_after_block(block);
++
++   /* Set the return flag */
++   if (state->return_flag == NULL) {
++      state->return_flag =
++         nir_local_variable_create(b->impl, glsl_bool_type(), "return");
++
++      /* Set a default value of false */
++      state->return_flag->constant_initializer =
++         rzalloc(state->return_flag, nir_constant);
++   }
++   nir_store_var(b, state->return_flag, nir_imm_int(b, NIR_TRUE), 1);
++
++   if (state->loop) {
++      /* We're in a loop;  we need to break out of it. */
++      nir_jump(b, nir_jump_break);
++   } else {
++      /* Not in a loop;  we'll deal with predicating later*/
++      assert(nir_cf_node_next(&block->cf_node) == NULL);
++   }
++
++   return true;
++}
++
++static bool
++lower_returns_in_cf_list(struct exec_list *cf_list,
++                         struct lower_returns_state *state)
++{
++   bool progress = false;
++
++   struct exec_list *parent_list = state->cf_list;
++   state->cf_list = cf_list;
++
++   /* We iterate over the list backwards because any given lower call may
++    * take everything following the given CF node and predicate it.  In
++    * order to avoid recursion/iteration problems, we want everything after
++    * a given node to already be lowered before this happens.
++    */
++   foreach_list_typed_reverse_safe(nir_cf_node, node, node, cf_list) {
++      switch (node->type) {
++      case nir_cf_node_block:
++         if (lower_returns_in_block(nir_cf_node_as_block(node), state))
++            progress = true;
++         break;
++
++      case nir_cf_node_if:
++         if (lower_returns_in_if(nir_cf_node_as_if(node), state))
++            progress = true;
++         break;
++
++      case nir_cf_node_loop:
++         if (lower_returns_in_loop(nir_cf_node_as_loop(node), state))
++            progress = true;
++         break;
++
++      default:
++         unreachable("Invalid inner CF node type");
++      }
++   }
++
++   state->cf_list = parent_list;
++
++   return progress;
++}
++
++bool
++nir_lower_returns_impl(nir_function_impl *impl)
++{
++   struct lower_returns_state state;
++
++   state.cf_list = &impl->body;
++   state.loop = NULL;
++   state.return_flag = NULL;
++   nir_builder_init(&state.builder, impl);
++
++   bool progress = lower_returns_in_cf_list(&impl->body, &state);
++
++   if (progress) {
++      nir_metadata_preserve(impl, nir_metadata_none);
++      nir_repair_ssa_impl(impl);
++   }
++
++   return progress;
++}
++
++bool
++nir_lower_returns(nir_shader *shader)
++{
++   bool progress = false;
++
++   nir_foreach_function(shader, function) {
++      if (function->impl)
++         progress = nir_lower_returns_impl(function->impl) || progress;
++   }
++
++   return progress;
++}
diff --cc src/compiler/nir/nir_lower_samplers.c

index 0000000000000000000000000000000000000000,96e829140142c484336659fd4555da612327d11d..29654136aee67236713b422dc7e10fb1e6e98abc

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_lower_samplers.c
+++ b/src/compiler/nir/nir_lower_samplers.c
@@@ -1,0 -1,187 +1,198 @@@
- -                                            instr->num_srcs + 1);
+ /*
+  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
+  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ #include "nir.h"
+ #include "nir_builder.h"
+ #include "program/hash_table.h"
+ #include "compiler/glsl/ir_uniform.h"
+ 
+ #include "main/compiler.h"
+ #include "main/mtypes.h"
+ #include "program/prog_parameter.h"
+ #include "program/program.h"
+ 
+ /* Calculate the sampler index based on array indicies and also
+  * calculate the base uniform location for struct members.
+  */
+ static void
+ calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr,
+                      unsigned *array_elements, nir_ssa_def **indirect,
+                      nir_builder *b, unsigned *location)
+ {
+    if (tail->child == NULL)
+       return;
+ 
+    switch (tail->child->deref_type) {
+    case nir_deref_type_array: {
+       nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+ 
+       assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
+ 
+       calc_sampler_offsets(tail->child, instr, array_elements,
+                            indirect, b, location);
+       instr->sampler_index += deref_array->base_offset * *array_elements;
+ 
+       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+          nir_ssa_def *mul =
+             nir_imul(b, nir_imm_int(b, *array_elements),
+                      nir_ssa_for_src(b, deref_array->indirect, 1));
+ 
+          nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
+                                NIR_SRC_INIT);
+ 
+          if (*indirect) {
+             *indirect = nir_iadd(b, *indirect, mul);
+          } else {
+             *indirect = mul;
+          }
+       }
+ 
+       *array_elements *= glsl_get_length(tail->type);
+        break;
+    }
+ 
+    case nir_deref_type_struct: {
+       nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child);
+       *location += glsl_get_record_location_offset(tail->type, deref_struct->index);
+       calc_sampler_offsets(tail->child, instr, array_elements,
+                            indirect, b, location);
+       break;
+    }
+ 
+    default:
+       unreachable("Invalid deref type");
+       break;
+    }
+ }
+ 
+ static void
+ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+               gl_shader_stage stage, nir_builder *builder)
+ {
+    if (instr->sampler == NULL)
+       return;
+ 
++   /* GLSL only has combined textures/samplers */
++   assert(instr->texture == NULL);
++
+    instr->sampler_index = 0;
+    unsigned location = instr->sampler->var->data.location;
+    unsigned array_elements = 1;
+    nir_ssa_def *indirect = NULL;
+ 
+    builder->cursor = nir_before_instr(&instr->instr);
+    calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements,
+                         &indirect, builder, &location);
+ 
+    if (indirect) {
+       /* First, we have to resize the array of texture sources */
+       nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
- -      instr->sampler_array_size = array_elements;
++                                            instr->num_srcs + 2);
+ 
+       for (unsigned i = 0; i < instr->num_srcs; i++) {
+          new_srcs[i].src_type = instr->src[i].src_type;
+          nir_instr_move_src(&instr->instr, &new_srcs[i].src,
+                             &instr->src[i].src);
+       }
+ 
+       ralloc_free(instr->src);
+       instr->src = new_srcs;
+ 
+       /* Now we can go ahead and move the source over to being a
+        * first-class texture source.
+        */
++      instr->src[instr->num_srcs].src_type = nir_tex_src_texture_offset;
++      instr->num_srcs++;
++      nir_instr_rewrite_src(&instr->instr,
++                            &instr->src[instr->num_srcs - 1].src,
++                            nir_src_for_ssa(indirect));
++
+       instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+       instr->num_srcs++;
+       nir_instr_rewrite_src(&instr->instr,
+                             &instr->src[instr->num_srcs - 1].src,
+                             nir_src_for_ssa(indirect));
+ 
++      instr->texture_array_size = array_elements;
+    }
+ 
+    if (location > shader_program->NumUniformStorage - 1 ||
+        !shader_program->UniformStorage[location].opaque[stage].active) {
+       assert(!"cannot return a sampler");
+       return;
+    }
+ 
+    instr->sampler_index +=
+       shader_program->UniformStorage[location].opaque[stage].index;
+ 
+    instr->sampler = NULL;
++
++   instr->texture_index = instr->sampler_index;
+ }
+ 
+ typedef struct {
+    nir_builder builder;
+    const struct gl_shader_program *shader_program;
+    gl_shader_stage stage;
+ } lower_state;
+ 
+ static bool
+ lower_block_cb(nir_block *block, void *_state)
+ {
+    lower_state *state = (lower_state *) _state;
+ 
+    nir_foreach_instr(block, instr) {
+       if (instr->type == nir_instr_type_tex) {
+          nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+          lower_sampler(tex_instr, state->shader_program, state->stage,
+                        &state->builder);
+       }
+    }
+ 
+    return true;
+ }
+ 
+ static void
+ lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+            gl_shader_stage stage)
+ {
+    lower_state state;
+ 
+    nir_builder_init(&state.builder, impl);
+    state.shader_program = shader_program;
+    state.stage = stage;
+ 
+    nir_foreach_block(impl, lower_block_cb, &state);
+ }
+ 
+ void
+ nir_lower_samplers(nir_shader *shader,
+                    const struct gl_shader_program *shader_program)
+ {
+    nir_foreach_function(shader, function) {
+       if (function->impl)
+          lower_impl(function->impl, shader_program, shader->stage);
+    }
+ }
diff --cc src/compiler/nir/nir_lower_system_values.c

index 0000000000000000000000000000000000000000,2bd787d3574bada0d20e5bbb426b655aeee46f65..79f6bedc990bc3e5c3c804c0b17a378eb94a9e3e

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@@ -1,0 -1,98 +1,166 @@@
- -      nir_intrinsic_op sysval_op =
- -         nir_intrinsic_from_system_value(var->data.location);
- -      nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0);
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ #include "nir.h"
+ #include "nir_builder.h"
+ 
+ struct lower_system_values_state {
+    nir_builder builder;
+    bool progress;
+ };
+ 
+ static bool
+ convert_block(nir_block *block, void *void_state)
+ {
+    struct lower_system_values_state *state = void_state;
+ 
+    nir_builder *b = &state->builder;
+ 
+    nir_foreach_instr_safe(block, instr) {
+       if (instr->type != nir_instr_type_intrinsic)
+          continue;
+ 
+       nir_intrinsic_instr *load_var = nir_instr_as_intrinsic(instr);
+ 
+       if (load_var->intrinsic != nir_intrinsic_load_var)
+          continue;
+ 
+       nir_variable *var = load_var->variables[0]->var;
+       if (var->data.mode != nir_var_system_value)
+          continue;
+ 
+       b->cursor = nir_after_instr(&load_var->instr);
+ 
++      nir_ssa_def *sysval;
++      switch (var->data.location) {
++      case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: {
++         /* From the GLSL man page for gl_GlobalInvocationID:
++          *
++          *    "The value of gl_GlobalInvocationID is equal to
++          *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
++          */
++
++         nir_const_value local_size;
++         local_size.u[0] = b->shader->info.cs.local_size[0];
++         local_size.u[1] = b->shader->info.cs.local_size[1];
++         local_size.u[2] = b->shader->info.cs.local_size[2];
++
++         nir_ssa_def *group_id =
++            nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0);
++         nir_ssa_def *local_id =
++            nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0);
++
++         sysval = nir_iadd(b, nir_imul(b, group_id,
++                                          nir_build_imm(b, 3, local_size)),
++                              local_id);
++         break;
++      }
++
++      case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: {
++         /* From the GLSL man page for gl_LocalInvocationIndex:
++          *
++          *    ?The value of gl_LocalInvocationIndex is equal to
++          *    gl_LocalInvocationID.z * gl_WorkGroupSize.x *
++          *    gl_WorkGroupSize.y + gl_LocalInvocationID.y *
++          *    gl_WorkGroupSize.x + gl_LocalInvocationID.x"
++          */
++         nir_ssa_def *local_id =
++            nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0);
++
++         unsigned stride_y = b->shader->info.cs.local_size[0];
++         unsigned stride_z = b->shader->info.cs.local_size[0] *
++                             b->shader->info.cs.local_size[1];
++
++         sysval = nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 2),
++                                          nir_imm_int(b, stride_z)),
++                              nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 1),
++                                                      nir_imm_int(b, stride_y)),
++                                          nir_channel(b, local_id, 0)));
++         break;
++      }
++
++      case SYSTEM_VALUE_VERTEX_ID:
++         if (b->shader->options->vertex_id_zero_based) {
++            sysval = nir_iadd(b,
++               nir_load_system_value(b, nir_intrinsic_load_vertex_id_zero_base, 0),
++               nir_load_system_value(b, nir_intrinsic_load_base_vertex, 0));
++         } else {
++            sysval = nir_load_system_value(b, nir_intrinsic_load_vertex_id, 0);
++         }
++         break;
++
++      case SYSTEM_VALUE_INSTANCE_INDEX:
++         sysval = nir_iadd(b,
++            nir_load_system_value(b, nir_intrinsic_load_instance_id, 0),
++            nir_load_system_value(b, nir_intrinsic_load_base_instance, 0));
++         break;
++
++      default: {
++         nir_intrinsic_op sysval_op =
++            nir_intrinsic_from_system_value(var->data.location);
++         sysval = nir_load_system_value(b, sysval_op, 0);
++         break;
++      } /* default */
++      }
+ 
+       nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval));
+       nir_instr_remove(&load_var->instr);
+ 
+       state->progress = true;
+    }
+ 
+    return true;
+ }
+ 
+ static bool
+ convert_impl(nir_function_impl *impl)
+ {
+    struct lower_system_values_state state;
+ 
+    state.progress = false;
+    nir_builder_init(&state.builder, impl);
+ 
+    nir_foreach_block(impl, convert_block, &state);
+    nir_metadata_preserve(impl, nir_metadata_block_index |
+                                nir_metadata_dominance);
+    return state.progress;
+ }
+ 
+ bool
+ nir_lower_system_values(nir_shader *shader)
+ {
+    bool progress = false;
+ 
+    nir_foreach_function(shader, function) {
+       if (function->impl)
+          progress = convert_impl(function->impl) || progress;
+    }
+ 
+    exec_list_make_empty(&shader->system_values);
+ 
+    return progress;
+ }
diff --cc src/compiler/nir/nir_lower_vars_to_ssa.c

index 0000000000000000000000000000000000000000,75d31ff60afb1e6d51f3fd152c1410b41dcebe87..e1f368d2f2b9aa16402951655797f6fc7e791989

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@@ -1,0 -1,973 +1,751 @@@
- -   nir_ssa_def **def_stack;
- -   nir_ssa_def **def_stack_tail;
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Jason Ekstrand (jason@jlekstrand.net)
+  *
+  */
+ 
+ #include "nir.h"
+ #include "nir_builder.h"
++#include "nir_phi_builder.h"
+ #include "nir_vla.h"
+ 
+ 
+ struct deref_node {
+    struct deref_node *parent;
+    const struct glsl_type *type;
+ 
+    bool lower_to_ssa;
+ 
+    /* Only valid for things that end up in the direct list.
+     * Note that multiple nir_deref_vars may correspond to this node, but they
+     * will all be equivalent, so any is as good as the other.
+     */
+    nir_deref_var *deref;
+    struct exec_node direct_derefs_link;
+ 
+    struct set *loads;
+    struct set *stores;
+    struct set *copies;
+ 
- -   /* A hash table mapping phi nodes to deref_state data */
- -   struct hash_table *phi_table;
++   struct nir_phi_builder_value *pb_value;
+ 
+    struct deref_node *wildcard;
+    struct deref_node *indirect;
+    struct deref_node *children[0];
+ };
+ 
+ struct lower_variables_state {
+    nir_shader *shader;
+    void *dead_ctx;
+    nir_function_impl *impl;
+ 
+    /* A hash table mapping variables to deref_node data */
+    struct hash_table *deref_var_nodes;
+ 
+    /* A hash table mapping fully-qualified direct dereferences, i.e.
+     * dereferences with no indirect or wildcard array dereferences, to
+     * deref_node data.
+     *
+     * At the moment, we only lower loads, stores, and copies that can be
+     * trivially lowered to loads and stores, i.e. copies with no indirects
+     * and no wildcards.  If a part of a variable that is being loaded from
+     * and/or stored into is also involved in a copy operation with
+     * wildcards, then we lower that copy operation to loads and stores, but
+     * otherwise we leave copies with wildcards alone. Since the only derefs
+     * used in these loads, stores, and trivial copies are ones with no
+     * wildcards and no indirects, these are precisely the derefs that we
+     * can actually consider lowering.
+     */
+    struct exec_list direct_deref_nodes;
+ 
+    /* Controls whether get_deref_node will add variables to the
+     * direct_deref_nodes table.  This is turned on when we are initially
+     * scanning for load/store instructions.  It is then turned off so we
+     * don't accidentally change the direct_deref_nodes table while we're
+     * iterating throug it.
+     */
+    bool add_to_direct_deref_nodes;
+ 
- -/** Pushes an SSA def onto the def stack for the given node
- - *
- - * Each node is potentially associated with a stack of SSA definitions.
- - * This stack is used for determining what SSA definition reaches a given
- - * point in the program for variable renaming.  The stack is always kept in
- - * dominance-order with at most one SSA def per block.  If the SSA
- - * definition on the top of the stack is in the same block as the one being
- - * pushed, the top element is replaced.
- - */
- -static void
- -def_stack_push(struct deref_node *node, nir_ssa_def *def,
- -               struct lower_variables_state *state)
- -{
- -   if (node->def_stack == NULL) {
- -      node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *,
- -                                     state->impl->num_blocks);
- -      node->def_stack_tail = node->def_stack - 1;
- -   }
- -
- -   if (node->def_stack_tail >= node->def_stack) {
- -      nir_ssa_def *top_def = *node->def_stack_tail;
- -
- -      if (def->parent_instr->block == top_def->parent_instr->block) {
- -         /* They're in the same block, just replace the top */
- -         *node->def_stack_tail = def;
- -         return;
- -      }
- -   }
- -
- -   *(++node->def_stack_tail) = def;
- -}
- -
- -/* Pop the top of the def stack if it's in the given block */
- -static void
- -def_stack_pop_if_in_block(struct deref_node *node, nir_block *block)
- -{
- -   /* If we're popping, then we have presumably pushed at some time in the
- -    * past so this should exist.
- -    */
- -   assert(node->def_stack != NULL);
- -
- -   /* The stack is already empty.  Do nothing. */
- -   if (node->def_stack_tail < node->def_stack)
- -      return;
- -
- -   nir_ssa_def *def = *node->def_stack_tail;
- -   if (def->parent_instr->block == block)
- -      node->def_stack_tail--;
- -}
- -
- -/** Retrieves the SSA definition on the top of the stack for the given
- - * node, if one exists.  If the stack is empty, then we return the constant
- - * initializer (if it exists) or an SSA undef.
- - */
- -static nir_ssa_def *
- -get_ssa_def_for_block(struct deref_node *node, nir_block *block,
- -                      struct lower_variables_state *state)
- -{
- -   /* If we have something on the stack, go ahead and return it.  We're
- -    * assuming that the top of the stack dominates the given block.
- -    */
- -   if (node->def_stack && node->def_stack_tail >= node->def_stack)
- -      return *node->def_stack_tail;
- -
- -   /* If we got here then we don't have a definition that dominates the
- -    * given block.  This means that we need to add an undef and use that.
- -    */
- -   nir_ssa_undef_instr *undef =
- -      nir_ssa_undef_instr_create(state->shader,
- -                                 glsl_get_vector_elements(node->type));
- -   nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr);
- -   def_stack_push(node, &undef->def, state);
- -   return &undef->def;
- -}
- -
- -/* Given a block and one of its predecessors, this function fills in the
- - * souces of the phi nodes to take SSA defs from the given predecessor.
- - * This function must be called exactly once per block/predecessor pair.
- - */
- -static void
- -add_phi_sources(nir_block *block, nir_block *pred,
- -                struct lower_variables_state *state)
- -{
- -   nir_foreach_instr(block, instr) {
- -      if (instr->type != nir_instr_type_phi)
- -         break;
- -
- -      nir_phi_instr *phi = nir_instr_as_phi(instr);
- -
- -      struct hash_entry *entry =
- -            _mesa_hash_table_search(state->phi_table, phi);
- -      if (!entry)
- -         continue;
- -
- -      struct deref_node *node = entry->data;
- -
- -      nir_phi_src *src = ralloc(phi, nir_phi_src);
- -      src->pred = pred;
- -      src->src.parent_instr = &phi->instr;
- -      src->src.is_ssa = true;
- -      src->src.ssa = get_ssa_def_for_block(node, pred, state);
- -
- -      list_addtail(&src->src.use_link, &src->src.ssa->uses);
- -
- -      exec_list_push_tail(&phi->srcs, &src->node);
- -   }
- -}
- -
++   struct nir_phi_builder *phi_builder;
+ };
+ 
+ static struct deref_node *
+ deref_node_create(struct deref_node *parent,
+                   const struct glsl_type *type, nir_shader *shader)
+ {
+    size_t size = sizeof(struct deref_node) +
+                  glsl_get_length(type) * sizeof(struct deref_node *);
+ 
+    struct deref_node *node = rzalloc_size(shader, size);
+    node->type = type;
+    node->parent = parent;
+    node->deref = NULL;
+    exec_node_init(&node->direct_derefs_link);
+ 
+    return node;
+ }
+ 
+ /* Returns the deref node associated with the given variable.  This will be
+  * the root of the tree representing all of the derefs of the given variable.
+  */
+ static struct deref_node *
+ get_deref_node_for_var(nir_variable *var, struct lower_variables_state *state)
+ {
+    struct deref_node *node;
+ 
+    struct hash_entry *var_entry =
+       _mesa_hash_table_search(state->deref_var_nodes, var);
+ 
+    if (var_entry) {
+       return var_entry->data;
+    } else {
+       node = deref_node_create(NULL, var->type, state->dead_ctx);
+       _mesa_hash_table_insert(state->deref_var_nodes, var, node);
+       return node;
+    }
+ }
+ 
+ /* Gets the deref_node for the given deref chain and creates it if it
+  * doesn't yet exist.  If the deref is fully-qualified and direct and
+  * state->add_to_direct_deref_nodes is true, it will be added to the hash
+  * table of of fully-qualified direct derefs.
+  */
+ static struct deref_node *
+ get_deref_node(nir_deref_var *deref, struct lower_variables_state *state)
+ {
+    bool is_direct = true;
+ 
+    /* Start at the base of the chain. */
+    struct deref_node *node = get_deref_node_for_var(deref->var, state);
+    assert(deref->deref.type == node->type);
+ 
+    for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
+       switch (tail->deref_type) {
+       case nir_deref_type_struct: {
+          nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+ 
+          assert(deref_struct->index < glsl_get_length(node->type));
+ 
+          if (node->children[deref_struct->index] == NULL)
+             node->children[deref_struct->index] =
+                deref_node_create(node, tail->type, state->dead_ctx);
+ 
+          node = node->children[deref_struct->index];
+          break;
+       }
+ 
+       case nir_deref_type_array: {
+          nir_deref_array *arr = nir_deref_as_array(tail);
+ 
+          switch (arr->deref_array_type) {
+          case nir_deref_array_type_direct:
+             /* This is possible if a loop unrolls and generates an
+              * out-of-bounds offset.  We need to handle this at least
+              * somewhat gracefully.
+              */
+             if (arr->base_offset >= glsl_get_length(node->type))
+                return NULL;
+ 
+             if (node->children[arr->base_offset] == NULL)
+                node->children[arr->base_offset] =
+                   deref_node_create(node, tail->type, state->dead_ctx);
+ 
+             node = node->children[arr->base_offset];
+             break;
+ 
+          case nir_deref_array_type_indirect:
+             if (node->indirect == NULL)
+                node->indirect = deref_node_create(node, tail->type,
+                                                   state->dead_ctx);
+ 
+             node = node->indirect;
+             is_direct = false;
+             break;
+ 
+          case nir_deref_array_type_wildcard:
+             if (node->wildcard == NULL)
+                node->wildcard = deref_node_create(node, tail->type,
+                                                   state->dead_ctx);
+ 
+             node = node->wildcard;
+             is_direct = false;
+             break;
+ 
+          default:
+             unreachable("Invalid array deref type");
+          }
+          break;
+       }
+       default:
+          unreachable("Invalid deref type");
+       }
+    }
+ 
+    assert(node);
+ 
+    /* Only insert if it isn't already in the list. */
+    if (is_direct && state->add_to_direct_deref_nodes &&
+        node->direct_derefs_link.next == NULL) {
+       node->deref = deref;
+       assert(deref->var != NULL);
+       exec_list_push_tail(&state->direct_deref_nodes,
+                           &node->direct_derefs_link);
+    }
+ 
+    return node;
+ }
+ 
+ /* \sa foreach_deref_node_match */
+ static bool
+ foreach_deref_node_worker(struct deref_node *node, nir_deref *deref,
+                           bool (* cb)(struct deref_node *node,
+                                       struct lower_variables_state *state),
+                           struct lower_variables_state *state)
+ {
+    if (deref->child == NULL) {
+       return cb(node, state);
+    } else {
+       switch (deref->child->deref_type) {
+       case nir_deref_type_array: {
+          nir_deref_array *arr = nir_deref_as_array(deref->child);
+          assert(arr->deref_array_type == nir_deref_array_type_direct);
+          if (node->children[arr->base_offset] &&
+              !foreach_deref_node_worker(node->children[arr->base_offset],
+                                         deref->child, cb, state))
+             return false;
+ 
+          if (node->wildcard &&
+              !foreach_deref_node_worker(node->wildcard,
+                                         deref->child, cb, state))
+             return false;
+ 
+          return true;
+       }
+ 
+       case nir_deref_type_struct: {
+          nir_deref_struct *str = nir_deref_as_struct(deref->child);
+          return foreach_deref_node_worker(node->children[str->index],
+                                           deref->child, cb, state);
+       }
+ 
+       default:
+          unreachable("Invalid deref child type");
+       }
+    }
+ }
+ 
+ /* Walks over every "matching" deref_node and calls the callback.  A node
+  * is considered to "match" if either refers to that deref or matches up t
+  * a wildcard.  In other words, the following would match a[6].foo[3].bar:
+  *
+  * a[6].foo[3].bar
+  * a[*].foo[3].bar
+  * a[6].foo[*].bar
+  * a[*].foo[*].bar
+  *
+  * The given deref must be a full-length and fully qualified (no wildcards
+  * or indirects) deref chain.
+  */
+ static bool
+ foreach_deref_node_match(nir_deref_var *deref,
+                          bool (* cb)(struct deref_node *node,
+                                      struct lower_variables_state *state),
+                          struct lower_variables_state *state)
+ {
+    nir_deref_var var_deref = *deref;
+    var_deref.deref.child = NULL;
+    struct deref_node *node = get_deref_node(&var_deref, state);
+ 
+    if (node == NULL)
+       return false;
+ 
+    return foreach_deref_node_worker(node, &deref->deref, cb, state);
+ }
+ 
+ /* \sa deref_may_be_aliased */
+ static bool
+ deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref,
+                           struct lower_variables_state *state)
+ {
+    if (deref->child == NULL) {
+       return false;
+    } else {
+       switch (deref->child->deref_type) {
+       case nir_deref_type_array: {
+          nir_deref_array *arr = nir_deref_as_array(deref->child);
+          if (arr->deref_array_type == nir_deref_array_type_indirect)
+             return true;
+ 
+          /* If there is an indirect at this level, we're aliased. */
+          if (node->indirect)
+             return true;
+ 
+          assert(arr->deref_array_type == nir_deref_array_type_direct);
+ 
+          if (node->children[arr->base_offset] &&
+              deref_may_be_aliased_node(node->children[arr->base_offset],
+                                        deref->child, state))
+             return true;
+ 
+          if (node->wildcard &&
+              deref_may_be_aliased_node(node->wildcard, deref->child, state))
+             return true;
+ 
+          return false;
+       }
+ 
+       case nir_deref_type_struct: {
+          nir_deref_struct *str = nir_deref_as_struct(deref->child);
+          if (node->children[str->index]) {
+              return deref_may_be_aliased_node(node->children[str->index],
+                                               deref->child, state);
+          } else {
+             return false;
+          }
+       }
+ 
+       default:
+          unreachable("Invalid nir_deref child type");
+       }
+    }
+ }
+ 
+ /* Returns true if there are no indirects that can ever touch this deref.
+  *
+  * For example, if the given deref is a[6].foo, then any uses of a[i].foo
+  * would cause this to return false, but a[i].bar would not affect it
+  * because it's a different structure member.  A var_copy involving of
+  * a[*].bar also doesn't affect it because that can be lowered to entirely
+  * direct load/stores.
+  *
+  * We only support asking this question about fully-qualified derefs.
+  * Obviously, it's pointless to ask this about indirects, but we also
+  * rule-out wildcards.  Handling Wildcard dereferences would involve
+  * checking each array index to make sure that there aren't any indirect
+  * references.
+  */
+ static bool
+ deref_may_be_aliased(nir_deref_var *deref,
+                      struct lower_variables_state *state)
+ {
+    return deref_may_be_aliased_node(get_deref_node_for_var(deref->var, state),
+                                     &deref->deref, state);
+ }
+ 
+ static void
+ register_load_instr(nir_intrinsic_instr *load_instr,
+                     struct lower_variables_state *state)
+ {
+    struct deref_node *node = get_deref_node(load_instr->variables[0], state);
+    if (node == NULL)
+       return;
+ 
+    if (node->loads == NULL)
+       node->loads = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+                                      _mesa_key_pointer_equal);
+ 
+    _mesa_set_add(node->loads, load_instr);
+ }
+ 
+ static void
+ register_store_instr(nir_intrinsic_instr *store_instr,
+                      struct lower_variables_state *state)
+ {
+    struct deref_node *node = get_deref_node(store_instr->variables[0], state);
+    if (node == NULL)
+       return;
+ 
+    if (node->stores == NULL)
+       node->stores = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+                                      _mesa_key_pointer_equal);
+ 
+    _mesa_set_add(node->stores, store_instr);
+ }
+ 
+ static void
+ register_copy_instr(nir_intrinsic_instr *copy_instr,
+                     struct lower_variables_state *state)
+ {
+    for (unsigned idx = 0; idx < 2; idx++) {
+       struct deref_node *node =
+          get_deref_node(copy_instr->variables[idx], state);
+ 
+       if (node == NULL)
+          continue;
+ 
+       if (node->copies == NULL)
+          node->copies = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
+ 
+       _mesa_set_add(node->copies, copy_instr);
+    }
+ }
+ 
+ /* Registers all variable uses in the given block. */
+ static bool
+ register_variable_uses_block(nir_block *block, void *void_state)
+ {
+    struct lower_variables_state *state = void_state;
+ 
+    nir_foreach_instr_safe(block, instr) {
+       if (instr->type != nir_instr_type_intrinsic)
+          continue;
+ 
+       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ 
+       switch (intrin->intrinsic) {
+       case nir_intrinsic_load_var:
+          register_load_instr(intrin, state);
+          break;
+ 
+       case nir_intrinsic_store_var:
+          register_store_instr(intrin, state);
+          break;
+ 
+       case nir_intrinsic_copy_var:
+          register_copy_instr(intrin, state);
+          break;
+ 
+       default:
+          continue;
+       }
+    }
+ 
+    return true;
+ }
+ 
+ /* Walks over all of the copy instructions to or from the given deref_node
+  * and lowers them to load/store intrinsics.
+  */
+ static bool
+ lower_copies_to_load_store(struct deref_node *node,
+                            struct lower_variables_state *state)
+ {
+    if (!node->copies)
+       return true;
+ 
+    struct set_entry *copy_entry;
+    set_foreach(node->copies, copy_entry) {
+       nir_intrinsic_instr *copy = (void *)copy_entry->key;
+ 
+       nir_lower_var_copy_instr(copy, state->shader);
+ 
+       for (unsigned i = 0; i < 2; ++i) {
+          struct deref_node *arg_node =
+             get_deref_node(copy->variables[i], state);
+ 
+          /* Only bother removing copy entries for other nodes */
+          if (arg_node == NULL || arg_node == node)
+             continue;
+ 
+          struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy);
+          assert(arg_entry);
+          _mesa_set_remove(node->copies, arg_entry);
+       }
+ 
+       nir_instr_remove(&copy->instr);
+    }
+ 
+    node->copies = NULL;
+ 
+    return true;
+ }
+ 
- -      if (instr->type == nir_instr_type_phi) {
- -         nir_phi_instr *phi = nir_instr_as_phi(instr);
- -
- -         struct hash_entry *entry =
- -            _mesa_hash_table_search(state->phi_table, phi);
- -
- -         /* This can happen if we already have phi nodes in the program
- -          * that were not created in this pass.
- -          */
- -         if (!entry)
- -            continue;
- -
- -         struct deref_node *node = entry->data;
- -
- -         def_stack_push(node, &phi->dest.ssa, state);
- -      } else if (instr->type == nir_instr_type_intrinsic) {
- -         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
- -
- -         switch (intrin->intrinsic) {
- -         case nir_intrinsic_load_var: {
- -            struct deref_node *node =
- -               get_deref_node(intrin->variables[0], state);
- -
- -            if (node == NULL) {
- -               /* If we hit this path then we are referencing an invalid
- -                * value.  Most likely, we unrolled something and are
- -                * reading past the end of some array.  In any case, this
- -                * should result in an undefined value.
- -                */
- -               nir_ssa_undef_instr *undef =
- -                  nir_ssa_undef_instr_create(state->shader,
- -                                             intrin->num_components);
- -
- -               nir_instr_insert_before(&intrin->instr, &undef->instr);
- -               nir_instr_remove(&intrin->instr);
- -
- -               nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- -                                        nir_src_for_ssa(&undef->def));
- -               continue;
- -            }
- -
- -            if (!node->lower_to_ssa)
- -               continue;
- -
- -            nir_alu_instr *mov = nir_alu_instr_create(state->shader,
- -                                                      nir_op_imov);
- -            mov->src[0].src.is_ssa = true;
- -            mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state);
- -            for (unsigned i = intrin->num_components; i < 4; i++)
- -               mov->src[0].swizzle[i] = 0;
+ /* Performs variable renaming by doing a DFS of the dominance tree
+  *
+  * This algorithm is very similar to the one outlined in "Efficiently
+  * Computing Static Single Assignment Form and the Control Dependence
+  * Graph" by Cytron et. al.  The primary difference is that we only put one
+  * SSA def on the stack per block.
+  */
+ static bool
+ rename_variables_block(nir_block *block, struct lower_variables_state *state)
+ {
+    nir_builder b;
+    nir_builder_init(&b, state->impl);
+ 
+    nir_foreach_instr_safe(block, instr) {
- -            assert(intrin->dest.is_ssa);
++      if (instr->type != nir_instr_type_intrinsic)
++         continue;
+ 
- -            mov->dest.write_mask = (1 << intrin->num_components) - 1;
- -            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
- -                              intrin->num_components, NULL);
++      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ 
- -            nir_instr_insert_before(&intrin->instr, &mov->instr);
++      switch (intrin->intrinsic) {
++      case nir_intrinsic_load_var: {
++         struct deref_node *node =
++            get_deref_node(intrin->variables[0], state);
++
++         if (node == NULL) {
++            /* If we hit this path then we are referencing an invalid
++             * value.  Most likely, we unrolled something and are
++             * reading past the end of some array.  In any case, this
++             * should result in an undefined value.
++             */
++            nir_ssa_undef_instr *undef =
++               nir_ssa_undef_instr_create(state->shader,
++                                          intrin->num_components);
+ 
- -                                     nir_src_for_ssa(&mov->dest.dest.ssa));
- -            break;
++            nir_instr_insert_before(&intrin->instr, &undef->instr);
+             nir_instr_remove(&intrin->instr);
+ 
+             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- -         case nir_intrinsic_store_var: {
- -            struct deref_node *node =
- -               get_deref_node(intrin->variables[0], state);
++                                     nir_src_for_ssa(&undef->def));
++            continue;
+          }
+ 
- -            if (node == NULL) {
- -               /* Probably an out-of-bounds array store.  That should be a
- -                * no-op. */
- -               nir_instr_remove(&intrin->instr);
- -               continue;
- -            }
++         if (!node->lower_to_ssa)
++            continue;
+ 
- -            if (!node->lower_to_ssa)
- -               continue;
- -
- -            assert(intrin->num_components ==
- -                   glsl_get_vector_elements(node->type));
- -
- -            assert(intrin->src[0].is_ssa);
- -
- -            nir_ssa_def *new_def;
- -            b.cursor = nir_before_instr(&intrin->instr);
- -
- -            if (intrin->const_index[0] == (1 << intrin->num_components) - 1) {
- -               /* Whole variable store - just copy the source.  Note that
- -                * intrin->num_components and intrin->src[0].ssa->num_components
- -                * may differ.
- -                */
- -               unsigned swiz[4];
- -               for (unsigned i = 0; i < 4; i++)
- -                  swiz[i] = i < intrin->num_components ? i : 0;
- -
- -               new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
- -                                     intrin->num_components, false);
- -            } else {
- -               nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state);
- -               /* For writemasked store_var intrinsics, we combine the newly
- -                * written values with the existing contents of unwritten
- -                * channels, creating a new SSA value for the whole vector.
- -                */
- -               nir_ssa_def *srcs[4];
- -               for (unsigned i = 0; i < intrin->num_components; i++) {
- -                  if (intrin->const_index[0] & (1 << i)) {
- -                     srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
- -                  } else {
- -                     srcs[i] = nir_channel(&b, old_def, i);
- -                  }
- -               }
- -               new_def = nir_vec(&b, srcs, intrin->num_components);
- -            }
- -
- -            assert(new_def->num_components == intrin->num_components);
++         nir_alu_instr *mov = nir_alu_instr_create(state->shader,
++                                                   nir_op_imov);
++         mov->src[0].src = nir_src_for_ssa(
++            nir_phi_builder_value_get_block_def(node->pb_value, block));
++         for (unsigned i = intrin->num_components; i < 4; i++)
++            mov->src[0].swizzle[i] = 0;
+ 
- -            def_stack_push(node, new_def, state);
++         assert(intrin->dest.is_ssa);
+ 
- -            /* We'll wait to remove the instruction until the next pass
- -             * where we pop the node we just pushed back off the stack.
- -             */
- -            break;
- -         }
++         mov->dest.write_mask = (1 << intrin->num_components) - 1;
++         nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
++                           intrin->num_components, NULL);
+ 
- -         default:
- -            break;
- -         }
++         nir_instr_insert_before(&intrin->instr, &mov->instr);
++         nir_instr_remove(&intrin->instr);
+ 
- -   }
- -
- -   if (block->successors[0])
- -      add_phi_sources(block->successors[0], block, state);
- -   if (block->successors[1])
- -      add_phi_sources(block->successors[1], block, state);
- -
- -   for (unsigned i = 0; i < block->num_dom_children; ++i)
- -      rename_variables_block(block->dom_children[i], state);
- -
- -   /* Now we iterate over the instructions and pop off any SSA defs that we
- -    * pushed in the first loop.
- -    */
- -   nir_foreach_instr_safe(block, instr) {
- -      if (instr->type == nir_instr_type_phi) {
- -         nir_phi_instr *phi = nir_instr_as_phi(instr);
- -
- -         struct hash_entry *entry =
- -            _mesa_hash_table_search(state->phi_table, phi);
- -
- -         /* This can happen if we already have phi nodes in the program
- -          * that were not created in this pass.
- -          */
- -         if (!entry)
- -            continue;
- -
- -         struct deref_node *node = entry->data;
++         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
++                                  nir_src_for_ssa(&mov->dest.dest.ssa));
++         break;
+       }
- -         def_stack_pop_if_in_block(node, block);
- -      } else if (instr->type == nir_instr_type_intrinsic) {
- -         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ 
- -         if (intrin->intrinsic != nir_intrinsic_store_var)
- -            continue;
- -
- -         struct deref_node *node = get_deref_node(intrin->variables[0], state);
- -         if (!node)
++      case nir_intrinsic_store_var: {
++         struct deref_node *node =
++            get_deref_node(intrin->variables[0], state);
+ 
- -         def_stack_pop_if_in_block(node, block);
- -         nir_instr_remove(&intrin->instr);
- -      }
- -   }
- -
- -   return true;
- -}
- -
- -/* Inserts phi nodes for all variables marked lower_to_ssa
- - *
- - * This is the same algorithm as presented in "Efficiently Computing Static
- - * Single Assignment Form and the Control Dependence Graph" by Cytron et.
- - * al.
- - */
- -static void
- -insert_phi_nodes(struct lower_variables_state *state)
- -{
- -   NIR_VLA_ZERO(unsigned, work, state->impl->num_blocks);
- -   NIR_VLA_ZERO(unsigned, has_already, state->impl->num_blocks);
- -
- -   /*
- -    * Since the work flags already prevent us from inserting a node that has
- -    * ever been inserted into W, we don't need to use a set to represent W.
- -    * Also, since no block can ever be inserted into W more than once, we know
- -    * that the maximum size of W is the number of basic blocks in the
- -    * function. So all we need to handle W is an array and a pointer to the
- -    * next element to be inserted and the next element to be removed.
- -    */
- -   NIR_VLA(nir_block *, W, state->impl->num_blocks);
- -
- -   unsigned w_start, w_end;
- -   unsigned iter_count = 0;
- -
- -   foreach_list_typed(struct deref_node, node, direct_derefs_link,
- -                      &state->direct_deref_nodes) {
- -      if (node->stores == NULL)
- -         continue;
++         if (node == NULL) {
++            /* Probably an out-of-bounds array store.  That should be a
++             * no-op. */
++            nir_instr_remove(&intrin->instr);
+             continue;
++         }
+ 
+          if (!node->lower_to_ssa)
+             continue;
+ 
- -      if (!node->lower_to_ssa)
- -         continue;
++         assert(intrin->num_components ==
++                glsl_get_vector_elements(node->type));
+ 
- -      w_start = w_end = 0;
- -      iter_count++;
++         assert(intrin->src[0].is_ssa);
+ 
- -      struct set_entry *store_entry;
- -      set_foreach(node->stores, store_entry) {
- -         nir_intrinsic_instr *store = (nir_intrinsic_instr *)store_entry->key;
- -         if (work[store->instr.block->index] < iter_count)
- -            W[w_end++] = store->instr.block;
- -         work[store->instr.block->index] = iter_count;
- -      }
- -
- -      while (w_start != w_end) {
- -         nir_block *cur = W[w_start++];
- -         struct set_entry *dom_entry;
- -         set_foreach(cur->dom_frontier, dom_entry) {
- -            nir_block *next = (nir_block *) dom_entry->key;
- -
- -            /*
- -             * If there's more than one return statement, then the end block
- -             * can be a join point for some definitions. However, there are
- -             * no instructions in the end block, so nothing would use those
- -             * phi nodes. Of course, we couldn't place those phi nodes
- -             * anyways due to the restriction of having no instructions in the
- -             * end block...
++         nir_ssa_def *new_def;
++         b.cursor = nir_before_instr(&intrin->instr);
+ 
- -            if (next == state->impl->end_block)
- -               continue;
- -
- -            if (has_already[next->index] < iter_count) {
- -               nir_phi_instr *phi = nir_phi_instr_create(state->shader);
- -               nir_ssa_dest_init(&phi->instr, &phi->dest,
- -                                 glsl_get_vector_elements(node->type), NULL);
- -               nir_instr_insert_before_block(next, &phi->instr);
++         if (intrin->const_index[0] == (1 << intrin->num_components) - 1) {
++            /* Whole variable store - just copy the source.  Note that
++             * intrin->num_components and intrin->src[0].ssa->num_components
++             * may differ.
+              */
- -               _mesa_hash_table_insert(state->phi_table, phi, node);
- -
- -               has_already[next->index] = iter_count;
- -               if (work[next->index] < iter_count) {
- -                  work[next->index] = iter_count;
- -                  W[w_end++] = next;
++            unsigned swiz[4];
++            for (unsigned i = 0; i < 4; i++)
++               swiz[i] = i < intrin->num_components ? i : 0;
+ 
- -}
++            new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
++                                  intrin->num_components, false);
++         } else {
++            nir_ssa_def *old_def =
++               nir_phi_builder_value_get_block_def(node->pb_value, block);
++            /* For writemasked store_var intrinsics, we combine the newly
++             * written values with the existing contents of unwritten
++             * channels, creating a new SSA value for the whole vector.
++             */
++            nir_ssa_def *srcs[4];
++            for (unsigned i = 0; i < intrin->num_components; i++) {
++               if (intrin->const_index[0] & (1 << i)) {
++                  srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
++               } else {
++                  srcs[i] = nir_channel(&b, old_def, i);
+                }
+             }
++            new_def = nir_vec(&b, srcs, intrin->num_components);
+          }
++
++         assert(new_def->num_components == intrin->num_components);
++
++         nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
++         nir_instr_remove(&intrin->instr);
++         break;
++      }
++
++      default:
++         break;
+       }
+    }
- -   state.phi_table = _mesa_hash_table_create(state.dead_ctx,
- -                                             _mesa_hash_pointer,
- -                                             _mesa_key_pointer_equal);
+ 
++   for (unsigned i = 0; i < block->num_dom_children; ++i)
++      rename_variables_block(block->dom_children[i], state);
++
++   return true;
++}
+ 
+ /** Implements a pass to lower variable uses to SSA values
+  *
+  * This path walks the list of instructions and tries to lower as many
+  * local variable load/store operations to SSA defs and uses as it can.
+  * The process involves four passes:
+  *
+  *  1) Iterate over all of the instructions and mark where each local
+  *     variable deref is used in a load, store, or copy.  While we're at
+  *     it, we keep track of all of the fully-qualified (no wildcards) and
+  *     fully-direct references we see and store them in the
+  *     direct_deref_nodes hash table.
+  *
+  *  2) Walk over the the list of fully-qualified direct derefs generated in
+  *     the previous pass.  For each deref, we determine if it can ever be
+  *     aliased, i.e. if there is an indirect reference anywhere that may
+  *     refer to it.  If it cannot be aliased, we mark it for lowering to an
+  *     SSA value.  At this point, we lower any var_copy instructions that
+  *     use the given deref to load/store operations and, if the deref has a
+  *     constant initializer, we go ahead and add a load_const value at the
+  *     beginning of the function with the initialized value.
+  *
+  *  3) Walk over the list of derefs we plan to lower to SSA values and
+  *     insert phi nodes as needed.
+  *
+  *  4) Perform "variable renaming" by replacing the load/store instructions
+  *     with SSA definitions and SSA uses.
+  */
+ static bool
+ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
+ {
+    struct lower_variables_state state;
+ 
+    state.shader = impl->function->shader;
+    state.dead_ctx = ralloc_context(state.shader);
+    state.impl = impl;
+ 
+    state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx,
+                                                    _mesa_hash_pointer,
+                                                    _mesa_key_pointer_equal);
+    exec_list_make_empty(&state.direct_deref_nodes);
- -      if (deref->var->constant_initializer) {
- -         nir_load_const_instr *load =
- -            nir_deref_get_const_initializer_load(state.shader, deref);
- -         nir_ssa_def_init(&load->instr, &load->def,
- -                          glsl_get_vector_elements(node->type), NULL);
- -         nir_instr_insert_before_cf_list(&impl->body, &load->instr);
- -         def_stack_push(node, &load->def, &state);
- -      }
- -
+ 
+    /* Build the initial deref structures and direct_deref_nodes table */
+    state.add_to_direct_deref_nodes = true;
+    nir_foreach_block(impl, register_variable_uses_block, &state);
+ 
+    bool progress = false;
+ 
+    nir_metadata_require(impl, nir_metadata_block_index);
+ 
+    /* We're about to iterate through direct_deref_nodes.  Don't modify it. */
+    state.add_to_direct_deref_nodes = false;
+ 
+    foreach_list_typed_safe(struct deref_node, node, direct_derefs_link,
+                            &state.direct_deref_nodes) {
+       nir_deref_var *deref = node->deref;
+ 
+       if (deref->var->data.mode != nir_var_local) {
+          exec_node_remove(&node->direct_derefs_link);
+          continue;
+       }
+ 
+       if (deref_may_be_aliased(deref, &state)) {
+          exec_node_remove(&node->direct_derefs_link);
+          continue;
+       }
+ 
+       node->lower_to_ssa = true;
+       progress = true;
+ 
- -   insert_phi_nodes(&state);
+       foreach_deref_node_match(deref, lower_copies_to_load_store, &state);
+    }
+ 
+    if (!progress)
+       return false;
+ 
+    nir_metadata_require(impl, nir_metadata_dominance);
+ 
+    /* We may have lowered some copy instructions to load/store
+     * instructions.  The uses from the copy instructions hav already been
+     * removed but we need to rescan to ensure that the uses from the newly
+     * added load/store instructions are registered.  We need this
+     * information for phi node insertion below.
+     */
+    nir_foreach_block(impl, register_variable_uses_block, &state);
+ 
++   state.phi_builder = nir_phi_builder_create(state.impl);
++
++   NIR_VLA(BITSET_WORD, store_blocks, BITSET_WORDS(state.impl->num_blocks));
++   foreach_list_typed(struct deref_node, node, direct_derefs_link,
++                      &state.direct_deref_nodes) {
++      if (!node->lower_to_ssa)
++         continue;
++
++      memset(store_blocks, 0,
++             BITSET_WORDS(state.impl->num_blocks) * sizeof(*store_blocks));
++
++      if (node->stores) {
++         struct set_entry *store_entry;
++         set_foreach(node->stores, store_entry) {
++            nir_intrinsic_instr *store =
++               (nir_intrinsic_instr *)store_entry->key;
++            BITSET_SET(store_blocks, store->instr.block->index);
++         }
++      }
++
++      if (node->deref->var->constant_initializer)
++         BITSET_SET(store_blocks, 0);
++
++      node->pb_value =
++         nir_phi_builder_add_value(state.phi_builder,
++                                   glsl_get_vector_elements(node->type),
++                                   store_blocks);
++
++      if (node->deref->var->constant_initializer) {
++         nir_load_const_instr *load =
++            nir_deref_get_const_initializer_load(state.shader, node->deref);
++         nir_instr_insert_before_cf_list(&impl->body, &load->instr);
++         nir_phi_builder_value_set_block_def(node->pb_value,
++                                             nir_start_block(impl), &load->def);
++      }
++   }
++
+    rename_variables_block(nir_start_block(impl), &state);
+ 
++   nir_phi_builder_finish(state.phi_builder);
++
+    nir_metadata_preserve(impl, nir_metadata_block_index |
+                                nir_metadata_dominance);
+ 
+    ralloc_free(state.dead_ctx);
+ 
+    return progress;
+ }
+ 
+ void
+ nir_lower_vars_to_ssa(nir_shader *shader)
+ {
+    nir_foreach_function(shader, function) {
+       if (function->impl)
+          nir_lower_vars_to_ssa_impl(function->impl);
+    }
+ }
diff --cc src/compiler/nir/nir_opcodes.py

index 0000000000000000000000000000000000000000,e79810c1991fb3699ae395f5532994e0eb0571c0..0eff89783dd13e8bb8aca99753bad088e3815104

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@@ -1,0 -1,668 +1,702 @@@
- -def unop_convert(name, in_type, out_type, const_expr):
+ #! /usr/bin/env python
+ #
+ # Copyright (C) 2014 Connor Abbott
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a
+ # copy of this software and associated documentation files (the "Software"),
+ # to deal in the Software without restriction, including without limitation
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ # and/or sell copies of the Software, and to permit persons to whom the
+ # Software is furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice (including the next
+ # paragraph) shall be included in all copies or substantial portions of the
+ # Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ # IN THE SOFTWARE.
+ #
+ # Authors:
+ #    Connor Abbott (cwabbott0@gmail.com)
+ 
+ 
+ # Class that represents all the information we have about the opcode
+ # NOTE: this must be kept in sync with nir_op_info
+ 
+ class Opcode(object):
+    """Class that represents all the information we have about the opcode
+    NOTE: this must be kept in sync with nir_op_info
+    """
+    def __init__(self, name, output_size, output_type, input_sizes,
+                 input_types, algebraic_properties, const_expr):
+       """Parameters:
+ 
+       - name is the name of the opcode (prepend nir_op_ for the enum name)
+       - all types are strings that get nir_type_ prepended to them
+       - input_types is a list of types
+       - algebraic_properties is a space-seperated string, where nir_op_is_ is
+         prepended before each entry
+       - const_expr is an expression or series of statements that computes the
+         constant value of the opcode given the constant values of its inputs.
+ 
+       Constant expressions are formed from the variables src0, src1, ...,
+       src(N-1), where N is the number of arguments.  The output of the
+       expression should be stored in the dst variable.  Per-component input
+       and output variables will be scalars and non-per-component input and
+       output variables will be a struct with fields named x, y, z, and w
+       all of the correct type.  Input and output variables can be assumed
+       to already be of the correct type and need no conversion.  In
+       particular, the conversion from the C bool type to/from  NIR_TRUE and
+       NIR_FALSE happens automatically.
+ 
+       For per-component instructions, the entire expression will be
+       executed once for each component.  For non-per-component
+       instructions, the expression is expected to store the correct values
+       in dst.x, dst.y, etc.  If "dst" does not exist anywhere in the
+       constant expression, an assignment to dst will happen automatically
+       and the result will be equivalent to "dst = <expression>" for
+       per-component instructions and "dst.x = dst.y = ... = <expression>"
+       for non-per-component instructions.
+       """
+       assert isinstance(name, str)
+       assert isinstance(output_size, int)
+       assert isinstance(output_type, str)
+       assert isinstance(input_sizes, list)
+       assert isinstance(input_sizes[0], int)
+       assert isinstance(input_types, list)
+       assert isinstance(input_types[0], str)
+       assert isinstance(algebraic_properties, str)
+       assert isinstance(const_expr, str)
+       assert len(input_sizes) == len(input_types)
+       assert 0 <= output_size <= 4
+       for size in input_sizes:
+          assert 0 <= size <= 4
+          if output_size != 0:
+             assert size != 0
+       self.name = name
+       self.num_inputs = len(input_sizes)
+       self.output_size = output_size
+       self.output_type = output_type
+       self.input_sizes = input_sizes
+       self.input_types = input_types
+       self.algebraic_properties = algebraic_properties
+       self.const_expr = const_expr
+ 
+ # helper variables for strings
+ tfloat = "float"
+ tint = "int"
+ tbool = "bool"
+ tuint = "uint"
+ 
+ commutative = "commutative "
+ associative = "associative "
+ 
+ # global dictionary of opcodes
+ opcodes = {}
+ 
+ def opcode(name, output_size, output_type, input_sizes, input_types,
+            algebraic_properties, const_expr):
+    assert name not in opcodes
+    opcodes[name] = Opcode(name, output_size, output_type, input_sizes,
+                           input_types, algebraic_properties, const_expr)
+ 
- -unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
- -unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion
- -unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
++def unop_convert(name, out_type, in_type, const_expr):
+    opcode(name, 0, out_type, [0], [in_type], "", const_expr)
+ 
+ def unop(name, ty, const_expr):
+    opcode(name, 0, ty, [0], [ty], "", const_expr)
+ 
+ def unop_horiz(name, output_size, output_type, input_size, input_type,
+                const_expr):
+    opcode(name, output_size, output_type, [input_size], [input_type], "",
+           const_expr)
+ 
+ def unop_reduce(name, output_size, output_type, input_type, prereduce_expr,
+                 reduce_expr, final_expr):
+    def prereduce(src):
+       return "(" + prereduce_expr.format(src=src) + ")"
+    def final(src):
+       return final_expr.format(src="(" + src + ")")
+    def reduce_(src0, src1):
+       return reduce_expr.format(src0=src0, src1=src1)
+    src0 = prereduce("src0.x")
+    src1 = prereduce("src0.y")
+    src2 = prereduce("src0.z")
+    src3 = prereduce("src0.w")
+    unop_horiz(name + "2", output_size, output_type, 2, input_type,
+               final(reduce_(src0, src1)))
+    unop_horiz(name + "3", output_size, output_type, 3, input_type,
+               final(reduce_(reduce_(src0, src1), src2)))
+    unop_horiz(name + "4", output_size, output_type, 4, input_type,
+               final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
+ 
+ 
+ # These two move instructions differ in what modifiers they support and what
+ # the negate modifier means. Otherwise, they are identical.
+ unop("fmov", tfloat, "src0")
+ unop("imov", tint, "src0")
+ 
+ unop("ineg", tint, "-src0")
+ unop("fneg", tfloat, "-src0")
+ unop("inot", tint, "~src0") # invert every bit of the integer
+ unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f")
+ unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)")
+ unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
+ unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
+ unop("fabs", tfloat, "fabsf(src0)")
+ unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)")
+ unop("frcp", tfloat, "1.0f / src0")
+ unop("frsq", tfloat, "1.0f / sqrtf(src0)")
+ unop("fsqrt", tfloat, "sqrtf(src0)")
+ unop("fexp2", tfloat, "exp2f(src0)")
+ unop("flog2", tfloat, "log2f(src0)")
- -unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
++unop_convert("f2i", tint, tfloat, "src0") # Float-to-integer conversion.
++unop_convert("f2u", tuint, tfloat, "src0") # Float-to-unsigned conversion
++unop_convert("i2f", tfloat, tint, "src0") # Integer-to-float conversion.
+ # Float-to-boolean conversion
- -unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
++unop_convert("f2b", tbool, tfloat, "src0 != 0.0f")
+ # Boolean-to-float conversion
- -unop_convert("i2b", tint, tbool, "src0 != 0")
- -unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
- -unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion.
++unop_convert("b2f", tfloat, tbool, "src0 ? 1.0f : 0.0f")
+ # Int-to-boolean conversion
- -unop_convert("ufind_msb", tuint, tint, """
++unop_convert("i2b", tbool, tint, "src0 != 0")
++unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
++unop_convert("u2f", tfloat, tuint, "src0") # Unsigned-to-float conversion.
+ 
+ # Unary floating-point rounding operations.
+ 
+ 
+ unop("ftrunc", tfloat, "truncf(src0)")
+ unop("fceil", tfloat, "ceilf(src0)")
+ unop("ffloor", tfloat, "floorf(src0)")
+ unop("ffract", tfloat, "src0 - floorf(src0)")
+ unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
+ 
++unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0))")
+ 
+ # Trigonometric operations.
+ 
+ 
+ unop("fsin", tfloat, "sinf(src0)")
+ unop("fcos", tfloat, "cosf(src0)")
+ 
+ 
+ # Partial derivatives.
+ 
+ 
+ unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
+ unop("fddy", tfloat, "0.0f")
+ unop("fddx_fine", tfloat, "0.0f")
+ unop("fddy_fine", tfloat, "0.0f")
+ unop("fddx_coarse", tfloat, "0.0f")
+ unop("fddy_coarse", tfloat, "0.0f")
+ 
+ 
+ # Floating point pack and unpack operations.
+ 
+ def pack_2x16(fmt):
+    unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """
+ dst.x = (uint32_t) pack_fmt_1x16(src0.x);
+ dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
+ """.replace("fmt", fmt))
+ 
+ def pack_4x8(fmt):
+    unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """
+ dst.x = (uint32_t) pack_fmt_1x8(src0.x);
+ dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
+ dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
+ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
+ """.replace("fmt", fmt))
+ 
+ def unpack_2x16(fmt):
+    unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """
+ dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
+ dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
+ """.replace("fmt", fmt))
+ 
+ def unpack_4x8(fmt):
+    unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """
+ dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
+ dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
+ dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
+ dst.w = unpack_fmt_1x8((uint8_t)(src0.x >> 24));
+ """.replace("fmt", fmt))
+ 
+ 
+ pack_2x16("snorm")
+ pack_4x8("snorm")
+ pack_2x16("unorm")
+ pack_4x8("unorm")
+ pack_2x16("half")
+ unpack_2x16("snorm")
+ unpack_4x8("snorm")
+ unpack_2x16("unorm")
+ unpack_4x8("unorm")
+ unpack_2x16("half")
+ 
++unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """
++dst = (src0.x & 0xffff) | (src0.y >> 16);
++""")
++
++unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """
++dst = (src0.x <<  0) |
++      (src0.y <<  8) |
++      (src0.z << 16) |
++      (src0.w << 24);
++""")
+ 
+ # Lowered floating point unpacking operations.
+ 
+ 
+ unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint,
+            "unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
+ unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint,
+            "unpack_half_1x16((uint16_t)(src0.x >> 16))")
+ 
+ 
+ # Bit operations, part of ARB_gpu_shader5.
+ 
+ 
+ unop("bitfield_reverse", tuint, """
+ /* we're not winning any awards for speed here, but that's ok */
+ dst = 0;
+ for (unsigned bit = 0; bit < 32; bit++)
+    dst |= ((src0 >> bit) & 1) << (31 - bit);
+ """)
+ unop("bit_count", tuint, """
+ dst = 0;
+ for (unsigned bit = 0; bit < 32; bit++) {
+    if ((src0 >> bit) & 1)
+       dst++;
+ }
+ """)
+ 
- -binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
++unop_convert("ufind_msb", tint, tuint, """
+ dst = -1;
+ for (int bit = 31; bit > 0; bit--) {
+    if ((src0 >> bit) & 1) {
+       dst = bit;
+       break;
+    }
+ }
+ """)
+ 
+ unop("ifind_msb", tint, """
+ dst = -1;
+ for (int bit = 31; bit >= 0; bit--) {
+    /* If src0 < 0, we're looking for the first 0 bit.
+     * if src0 >= 0, we're looking for the first 1 bit.
+     */
+    if ((((src0 >> bit) & 1) && (src0 >= 0)) ||
+       (!((src0 >> bit) & 1) && (src0 < 0))) {
+       dst = bit;
+       break;
+    }
+ }
+ """)
+ 
+ unop("find_lsb", tint, """
+ dst = -1;
+ for (unsigned bit = 0; bit < 32; bit++) {
+    if ((src0 >> bit) & 1) {
+       dst = bit;
+       break;
+    }
+ }
+ """)
+ 
+ 
+ for i in xrange(1, 5):
+    for j in xrange(1, 5):
+       unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f")
+ 
+ def binop_convert(name, out_type, in_type, alg_props, const_expr):
+    opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr)
+ 
+ def binop(name, ty, alg_props, const_expr):
+    binop_convert(name, ty, ty, alg_props, const_expr)
+ 
+ def binop_compare(name, ty, alg_props, const_expr):
+    binop_convert(name, tbool, ty, alg_props, const_expr)
+ 
+ def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size,
+                 src2_type, const_expr):
+    opcode(name, out_size, out_type, [src1_size, src2_size], [src1_type, src2_type],
+           "", const_expr)
+ 
+ def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
+                  reduce_expr, final_expr):
+    def final(src):
+       return final_expr.format(src= "(" + src + ")")
+    def reduce_(src0, src1):
+       return reduce_expr.format(src0=src0, src1=src1)
+    def prereduce(src0, src1):
+       return "(" + prereduce_expr.format(src0=src0, src1=src1) + ")"
+    src0 = prereduce("src0.x", "src1.x")
+    src1 = prereduce("src0.y", "src1.y")
+    src2 = prereduce("src0.z", "src1.z")
+    src3 = prereduce("src0.w", "src1.w")
+    opcode(name + "2", output_size, output_type,
+           [2, 2], [src_type, src_type], commutative,
+           final(reduce_(src0, src1)))
+    opcode(name + "3", output_size, output_type,
+           [3, 3], [src_type, src_type], commutative,
+           final(reduce_(reduce_(src0, src1), src2)))
+    opcode(name + "4", output_size, output_type,
+           [4, 4], [src_type, src_type], commutative,
+           final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
+ 
+ binop("fadd", tfloat, commutative + associative, "src0 + src1")
+ binop("iadd", tint, commutative + associative, "src0 + src1")
+ binop("fsub", tfloat, "", "src0 - src1")
+ binop("isub", tint, "", "src0 - src1")
+ 
+ binop("fmul", tfloat, commutative + associative, "src0 * src1")
+ # low 32-bits of signed/unsigned integer multiply
+ binop("imul", tint, commutative + associative, "src0 * src1")
+ # high 32-bits of signed integer multiply
+ binop("imul_high", tint, commutative,
+       "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
+ # high 32-bits of unsigned integer multiply
+ binop("umul_high", tuint, commutative,
+       "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
+ 
+ binop("fdiv", tfloat, "", "src0 / src1")
+ binop("idiv", tint, "", "src0 / src1")
+ binop("udiv", tuint, "", "src0 / src1")
+ 
+ # returns a boolean representing the carry resulting from the addition of
+ # the two unsigned arguments.
+ 
+ binop_convert("uadd_carry", tuint, tuint, commutative, "src0 + src1 < src0")
+ 
+ # returns a boolean representing the borrow resulting from the subtraction
+ # of the two unsigned arguments.
+ 
+ binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1")
+ 
+ binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1")
+ 
++# For signed integers, there are several different possible definitions of
++# "modulus" or "remainder".  We follow the conventions used by LLVM and
++# SPIR-V.  The irem opcode implements the standard C/C++ signed "%"
++# operation while the imod opcode implements the more mathematical
++# "modulus" operation.  For details on the difference, see
++#
++# http://mathforum.org/library/drmath/view/52343.html
++
++binop("irem", tint, "", "src1 == 0 ? 0 : src0 % src1")
++binop("imod", tint, "",
++      "src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ?"
++      "                 src0 % src1 : src0 % src1 + src1)")
++binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
++binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / src1)")
++
+ #
+ # Comparisons
+ #
+ 
+ 
+ # these integer-aware comparisons return a boolean (0 or ~0)
+ 
+ binop_compare("flt", tfloat, "", "src0 < src1")
+ binop_compare("fge", tfloat, "", "src0 >= src1")
+ binop_compare("feq", tfloat, commutative, "src0 == src1")
+ binop_compare("fne", tfloat, commutative, "src0 != src1")
+ binop_compare("ilt", tint, "", "src0 < src1")
+ binop_compare("ige", tint, "", "src0 >= src1")
+ binop_compare("ieq", tint, commutative, "src0 == src1")
+ binop_compare("ine", tint, commutative, "src0 != src1")
+ binop_compare("ult", tuint, "", "src0 < src1")
+ binop_compare("uge", tuint, "", "src0 >= src1")
+ 
+ # integer-aware GLSL-style comparisons that compare floats and ints
+ 
+ binop_reduce("ball_fequal",  1, tbool, tfloat, "{src0} == {src1}",
+              "{src0} && {src1}", "{src}")
+ binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}",
+              "{src0} || {src1}", "{src}")
+ binop_reduce("ball_iequal",  1, tbool, tint, "{src0} == {src1}",
+              "{src0} && {src1}", "{src}")
+ binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
+              "{src0} || {src1}", "{src}")
+ 
+ # non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
+ 
+ binop_reduce("fall_equal",  1, tfloat, tfloat, "{src0} == {src1}",
+              "{src0} && {src1}", "{src} ? 1.0f : 0.0f")
+ binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
+              "{src0} || {src1}", "{src} ? 1.0f : 0.0f")
+ 
+ # These comparisons for integer-less hardware return 1.0 and 0.0 for true
+ # and false respectively
+ 
+ binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
+ binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
+ binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
+ binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
+ 
+ 
+ binop("ishl", tint, "", "src0 << src1")
+ binop("ishr", tint, "", "src0 >> src1")
+ binop("ushr", tuint, "", "src0 >> src1")
+ 
+ # bitwise logic operators
+ #
+ # These are also used as boolean and, or, xor for hardware supporting
+ # integers.
+ 
+ 
+ binop("iand", tuint, commutative + associative, "src0 & src1")
+ binop("ior", tuint, commutative + associative, "src0 | src1")
+ binop("ixor", tuint, commutative + associative, "src0 ^ src1")
+ 
+ 
+ # floating point logic operators
+ #
+ # These use (src != 0.0) for testing the truth of the input, and output 1.0
+ # for true and 0.0 for false
+ 
+ binop("fand", tfloat, commutative,
+       "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
+ binop("for", tfloat, commutative,
+       "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
+ binop("fxor", tfloat, commutative,
+       "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
+ 
+ binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
+              "{src}")
+ 
+ binop_reduce("fdot_replicated", 4, tfloat, tfloat,
+              "{src0} * {src1}", "{src0} + {src1}", "{src}")
+ 
+ opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], "",
+        "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+ opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "",
+        "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+ 
+ binop("fmin", tfloat, "", "fminf(src0, src1)")
+ binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
+ binop("umin", tuint, commutative + associative, "src1 > src0 ? src0 : src1")
+ binop("fmax", tfloat, "", "fmaxf(src0, src1)")
+ binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
+ binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
+ 
+ # Saturated vector add for 4 8bit ints.
+ binop("usadd_4x8", tint, commutative + associative, """
+ dst = 0;
+ for (int i = 0; i < 32; i += 8) {
+    dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
+ }
+ """)
+ 
+ # Saturated vector subtract for 4 8bit ints.
+ binop("ussub_4x8", tint, "", """
+ dst = 0;
+ for (int i = 0; i < 32; i += 8) {
+    int src0_chan = (src0 >> i) & 0xff;
+    int src1_chan = (src1 >> i) & 0xff;
+    if (src0_chan > src1_chan)
+       dst |= (src0_chan - src1_chan) << i;
+ }
+ """)
+ 
+ # vector min for 4 8bit ints.
+ binop("umin_4x8", tint, commutative + associative, """
+ dst = 0;
+ for (int i = 0; i < 32; i += 8) {
+    dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
+ }
+ """)
+ 
+ # vector max for 4 8bit ints.
+ binop("umax_4x8", tint, commutative + associative, """
+ dst = 0;
+ for (int i = 0; i < 32; i += 8) {
+    dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
+ }
+ """)
+ 
+ # unorm multiply: (a * b) / 255.
+ binop("umul_unorm_4x8", tint, commutative + associative, """
+ dst = 0;
+ for (int i = 0; i < 32; i += 8) {
+    int src0_chan = (src0 >> i) & 0xff;
+    int src1_chan = (src1 >> i) & 0xff;
+    dst |= ((src0_chan * src1_chan) / 255) << i;
+ }
+ """)
+ 
+ binop("fpow", tfloat, "", "powf(src0, src1)")
+ 
+ binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
+             "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
+ 
+ # bfm implements the behavior of the first operation of the SM5 "bfi" assembly
+ # and that of the "bfi1" i965 instruction. That is, it has undefined behavior
+ # if either of its arguments are 32.
+ binop_convert("bfm", tuint, tint, "", """
+ int bits = src0, offset = src1;
+ if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32)
+    dst = 0; /* undefined */
+ else
+    dst = ((1u << bits) - 1) << offset;
+ """)
+ 
+ opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
+ dst = ldexpf(src0, src1);
+ /* flush denormals to zero. */
+ if (!isnormal(dst))
+    dst = copysignf(0.0f, src0);
+ """)
+ 
+ # Combines the first component of each input to make a 2-component vector.
+ 
+ binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """
+ dst.x = src0.x;
+ dst.y = src1.x;
+ """)
+ 
++# Byte extraction
++binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
++binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))")
++
++# Word extraction
++binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
++binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))")
++
++
+ def triop(name, ty, const_expr):
+    opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr)
+ def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
+    opcode(name, output_size, tuint,
+    [src1_size, src2_size, src3_size],
+    [tuint, tuint, tuint], "", const_expr)
+ 
+ triop("ffma", tfloat, "src0 * src1 + src2")
+ 
+ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
+ 
+ # Conditional Select
+ #
+ # A vector conditional select instruction (like ?:, but operating per-
+ # component on vectors). There are two versions, one for floating point
+ # bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
+ 
+ 
+ triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
+ opcode("bcsel", 0, tuint, [0, 0, 0],
+       [tbool, tuint, tuint], "", "src0 ? src1 : src2")
+ 
+ # SM5 bfi assembly
+ triop("bfi", tuint, """
+ unsigned mask = src0, insert = src1, base = src2;
+ if (mask == 0) {
+    dst = base;
+ } else {
+    unsigned tmp = mask;
+    while (!(tmp & 1)) {
+       tmp >>= 1;
+       insert <<= 1;
+    }
+    dst = (base & ~mask) | (insert & mask);
+ }
+ """)
+ 
+ # SM5 ubfe/ibfe assembly
+ opcode("ubfe", 0, tuint,
+        [0, 0, 0], [tuint, tint, tint], "", """
+ unsigned base = src0;
+ int offset = src1, bits = src2;
+ if (bits == 0) {
+    dst = 0;
+ } else if (bits < 0 || offset < 0) {
+    dst = 0; /* undefined */
+ } else if (offset + bits < 32) {
+    dst = (base << (32 - bits - offset)) >> (32 - bits);
+ } else {
+    dst = base >> offset;
+ }
+ """)
+ opcode("ibfe", 0, tint,
+        [0, 0, 0], [tint, tint, tint], "", """
+ int base = src0;
+ int offset = src1, bits = src2;
+ if (bits == 0) {
+    dst = 0;
+ } else if (bits < 0 || offset < 0) {
+    dst = 0; /* undefined */
+ } else if (offset + bits < 32) {
+    dst = (base << (32 - bits - offset)) >> (32 - bits);
+ } else {
+    dst = base >> offset;
+ }
+ """)
+ 
+ # GLSL bitfieldExtract()
+ opcode("ubitfield_extract", 0, tuint,
+        [0, 0, 0], [tuint, tint, tint], "", """
+ unsigned base = src0;
+ int offset = src1, bits = src2;
+ if (bits == 0) {
+    dst = 0;
+ } else if (bits < 0 || offset < 0 || offset + bits > 32) {
+    dst = 0; /* undefined per the spec */
+ } else {
+    dst = (base >> offset) & ((1ull << bits) - 1);
+ }
+ """)
+ opcode("ibitfield_extract", 0, tint,
+        [0, 0, 0], [tint, tint, tint], "", """
+ int base = src0;
+ int offset = src1, bits = src2;
+ if (bits == 0) {
+    dst = 0;
+ } else if (offset < 0 || bits < 0 || offset + bits > 32) {
+    dst = 0;
+ } else {
+    dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */
+ }
+ """)
+ 
+ # Combines the first component of each input to make a 3-component vector.
+ 
+ triop_horiz("vec3", 3, 1, 1, 1, """
+ dst.x = src0.x;
+ dst.y = src1.x;
+ dst.z = src2.x;
+ """)
+ 
+ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
+                  src4_size, const_expr):
+    opcode(name, output_size, tuint,
+           [src1_size, src2_size, src3_size, src4_size],
+           [tuint, tuint, tuint, tuint],
+           "", const_expr)
+ 
+ opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
+        [tuint, tuint, tint, tint], "", """
+ unsigned base = src0, insert = src1;
+ int offset = src2, bits = src3;
+ if (bits == 0) {
+    dst = 0;
+ } else if (offset < 0 || bits < 0 || bits + offset > 32) {
+    dst = 0;
+ } else {
+    unsigned mask = ((1ull << bits) - 1) << offset;
+    dst = (base & ~mask) | ((insert << bits) & mask);
+ }
+ """)
+ 
+ quadop_horiz("vec4", 4, 1, 1, 1, 1, """
+ dst.x = src0.x;
+ dst.y = src1.x;
+ dst.z = src2.x;
+ dst.w = src3.x;
+ """)
+ 
+ 
diff --cc src/compiler/nir/nir_opt_algebraic.py

index 0000000000000000000000000000000000000000,7745b76f7ce8f11059679a6d8adebca19fc5a514..f4bfd3a921a6c501aa2a65632342c0687ea8c394

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@@ -1,0 -1,285 +1,354 @@@
+ #! /usr/bin/env python
++# -*- encoding: utf-8 -*-
+ #
+ # Copyright (C) 2014 Intel Corporation
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a
+ # copy of this software and associated documentation files (the "Software"),
+ # to deal in the Software without restriction, including without limitation
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ # and/or sell copies of the Software, and to permit persons to whom the
+ # Software is furnished to do so, subject to the following conditions:
+ #
+ # The above copyright notice and this permission notice (including the next
+ # paragraph) shall be included in all copies or substantial portions of the
+ # Software.
+ #
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ # IN THE SOFTWARE.
+ #
+ # Authors:
+ #    Jason Ekstrand (jason@jlekstrand.net)
+ 
+ import nir_algebraic
+ 
+ # Convenience variables
+ a = 'a'
+ b = 'b'
+ c = 'c'
+ d = 'd'
+ 
+ # Written in the form (<search>, <replace>) where <search> is an expression
+ # and <replace> is either an expression or a value.  An expression is
+ # defined as a tuple of the form (<op>, <src0>, <src1>, <src2>, <src3>)
+ # where each source is either an expression or a value.  A value can be
+ # either a numeric constant or a string representing a variable name.
+ #
+ # Variable names are specified as "[#]name[@type]" where "#" inicates that
+ # the given variable will only match constants and the type indicates that
+ # the given variable will only match values from ALU instructions with the
+ # given output type.
+ #
+ # For constants, you have to be careful to make sure that it is the right
+ # type because python is unaware of the source and destination types of the
+ # opcodes.
+ 
+ optimizations = [
+    (('fneg', ('fneg', a)), a),
+    (('ineg', ('ineg', a)), a),
+    (('fabs', ('fabs', a)), ('fabs', a)),
+    (('fabs', ('fneg', a)), ('fabs', a)),
+    (('iabs', ('iabs', a)), ('iabs', a)),
+    (('iabs', ('ineg', a)), ('iabs', a)),
+    (('fadd', a, 0.0), a),
+    (('iadd', a, 0), a),
+    (('usadd_4x8', a, 0), a),
+    (('usadd_4x8', a, ~0), ~0),
+    (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
+    (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
+    (('fadd', ('fneg', a), a), 0.0),
+    (('iadd', ('ineg', a), a), 0),
+    (('iadd', ('ineg', a), ('iadd', a, b)), b),
+    (('iadd', a, ('iadd', ('ineg', a), b)), b),
+    (('fadd', ('fneg', a), ('fadd', a, b)), b),
+    (('fadd', a, ('fadd', ('fneg', a), b)), b),
+    (('fmul', a, 0.0), 0.0),
+    (('imul', a, 0), 0),
+    (('umul_unorm_4x8', a, 0), 0),
+    (('umul_unorm_4x8', a, ~0), a),
+    (('fmul', a, 1.0), a),
+    (('imul', a, 1), a),
+    (('fmul', a, -1.0), ('fneg', a)),
+    (('imul', a, -1), ('ineg', a)),
++   (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
+    (('ffma', 0.0, a, b), b),
+    (('ffma', a, 0.0, b), b),
+    (('ffma', a, b, 0.0), ('fmul', a, b)),
+    (('ffma', a, 1.0, b), ('fadd', a, b)),
+    (('ffma', 1.0, a, b), ('fadd', a, b)),
+    (('flrp', a, b, 0.0), a),
+    (('flrp', a, b, 1.0), b),
+    (('flrp', a, a, b), a),
+    (('flrp', 0.0, a, b), ('fmul', a, b)),
+    (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+    (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
+    (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
+    (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
+    (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
+    (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
+    # Comparison simplifications
+    (('inot', ('flt', a, b)), ('fge', a, b)),
+    (('inot', ('fge', a, b)), ('flt', a, b)),
+    (('inot', ('feq', a, b)), ('fne', a, b)),
+    (('inot', ('fne', a, b)), ('feq', a, b)),
+    (('inot', ('ilt', a, b)), ('ige', a, b)),
+    (('inot', ('ige', a, b)), ('ilt', a, b)),
+    (('inot', ('ieq', a, b)), ('ine', a, b)),
+    (('inot', ('ine', a, b)), ('ieq', a, b)),
+    (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
+    (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
+    (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
+    (('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)),
+    (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
+    (('fmin', a, a), a),
+    (('fmax', a, a), a),
+    (('imin', a, a), a),
+    (('imax', a, a), a),
+    (('umin', a, a), a),
+    (('umax', a, a), a),
+    (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
+    (('fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
+    (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
+    (('fsat', ('fsat', a)), ('fsat', a)),
+    (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+    (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+    (('ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
+    (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
+    (('ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
+    (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
+    (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
+    (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
+    (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'),
+    (('fne', ('fneg', a), a), ('fne', a, 0.0)),
+    (('feq', ('fneg', a), a), ('feq', a, 0.0)),
+    # Emulating booleans
+    (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))),
+    (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
+    (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
+    (('iand', 'a@bool', 1.0), ('b2f', a)),
+    (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+    (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+    # Comparison with the same args.  Note that these are not done for
+    # the float versions because NaN always returns false on float
+    # inequalities.
+    (('ilt', a, a), False),
+    (('ige', a, a), True),
+    (('ieq', a, a), True),
+    (('ine', a, a), False),
+    (('ult', a, a), False),
+    (('uge', a, a), True),
+    # Logical and bit operations
+    (('fand', a, 0.0), 0.0),
+    (('iand', a, a), a),
+    (('iand', a, ~0), a),
+    (('iand', a, 0), 0),
+    (('ior', a, a), a),
+    (('ior', a, 0), a),
+    (('fxor', a, a), 0.0),
+    (('ixor', a, a), 0),
+    (('inot', ('inot', a)), a),
+    # DeMorgan's Laws
+    (('iand', ('inot', a), ('inot', b)), ('inot', ('ior',  a, b))),
+    (('ior',  ('inot', a), ('inot', b)), ('inot', ('iand', a, b))),
+    # Shift optimizations
+    (('ishl', 0, a), 0),
+    (('ishl', a, 0), a),
+    (('ishr', 0, a), 0),
+    (('ishr', a, 0), a),
+    (('ushr', 0, a), 0),
+    (('ushr', a, 0), a),
+    # Exponential/logarithmic identities
+    (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
+    (('flog2', ('fexp2', a)), a), # lg2(2^a) = a
+    (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
+    (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
+    (('fpow', a, 1.0), a),
+    (('fpow', a, 2.0), ('fmul', a, a)),
+    (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
+    (('fpow', 2.0, a), ('fexp2', a)),
+    (('fpow', ('fpow', a, 2.2), 0.454545), a),
+    (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
+    (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+    (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+    (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+    (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+    (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+    (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+    (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+    (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+    (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+    (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+    # Division and reciprocal
+    (('fdiv', 1.0, a), ('frcp', a)),
+    (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
+    (('frcp', ('frcp', a)), a),
+    (('frcp', ('fsqrt', a)), ('frsq', a)),
+    (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
+    (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
+    # Boolean simplifications
+    (('ieq', 'a@bool', True), a),
+    (('ine', 'a@bool', True), ('inot', a)),
+    (('ine', 'a@bool', False), a),
+    (('ieq', 'a@bool', False), ('inot', 'a')),
+    (('bcsel', a, True, False), ('ine', a, 0)),
+    (('bcsel', a, False, True), ('ieq', a, 0)),
+    (('bcsel', True, b, c), b),
+    (('bcsel', False, b, c), c),
+    # The result of this should be hit by constant propagation and, in the
+    # next round of opt_algebraic, get picked up by one of the above two.
+    (('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)),
+ 
+    (('bcsel', a, b, b), b),
+    (('fcsel', a, b, b), b),
+ 
+    # Conversions
+    (('i2b', ('b2i', a)), a),
+    (('f2i', ('ftrunc', a)), ('f2i', a)),
+    (('f2u', ('ftrunc', a)), ('f2u', a)),
+ 
+    # Subtracts
+    (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
+    (('isub', a, ('isub', 0, b)), ('iadd', a, b)),
+    (('ussub_4x8', a, 0), a),
+    (('ussub_4x8', a, ~0), 0),
+    (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
+    (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
+    (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
+    (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
+    (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
+    (('iadd', a, ('isub', 0, b)), ('isub', a, b)),
+    (('fabs', ('fsub', 0.0, a)), ('fabs', a)),
+    (('iabs', ('isub', 0, a)), ('iabs', a)),
+ 
+    # Misc. lowering
+    (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'),
++   (('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'),
+    (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'),
+    (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'),
++   (('ldexp', 'x', 'exp'),
++    ('fmul', 'x', ('ishl', ('imin', ('imax', ('iadd', 'exp', 0x7f), 0), 0xff), 23))),
+ 
+    (('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
+     ('bcsel', ('ilt', 31, 'bits'), 'insert',
+               ('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')),
+     'options->lower_bitfield_insert'),
+ 
+    (('ibitfield_extract', 'value', 'offset', 'bits'),
+     ('bcsel', ('ilt', 31, 'bits'), 'value',
+               ('ibfe', 'value', 'offset', 'bits')),
+     'options->lower_bitfield_extract'),
+ 
+    (('ubitfield_extract', 'value', 'offset', 'bits'),
+     ('bcsel', ('ult', 31, 'bits'), 'value',
+               ('ubfe', 'value', 'offset', 'bits')),
+     'options->lower_bitfield_extract'),
++
++   (('extract_ibyte', a, b),
++    ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8),
++    'options->lower_extract_byte'),
++
++   (('extract_ubyte', a, b),
++    ('iand', ('ushr', a, ('imul', b, 8)), 0xff),
++    'options->lower_extract_byte'),
++
++   (('extract_iword', a, b),
++    ('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16),
++    'options->lower_extract_word'),
++
++   (('extract_uword', a, b),
++    ('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
++    'options->lower_extract_word'),
++
++    (('pack_unorm_2x16', 'v'),
++     ('pack_uvec2_to_uint',
++        ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))),
++     'options->lower_pack_unorm_2x16'),
++
++    (('pack_unorm_4x8', 'v'),
++     ('pack_uvec4_to_uint',
++        ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))),
++     'options->lower_pack_unorm_4x8'),
++
++    (('pack_snorm_2x16', 'v'),
++     ('pack_uvec2_to_uint',
++        ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))),
++     'options->lower_pack_snorm_2x16'),
++
++    (('pack_snorm_4x8', 'v'),
++     ('pack_uvec4_to_uint',
++        ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))),
++     'options->lower_pack_snorm_4x8'),
++
++    (('unpack_unorm_2x16', 'v'),
++     ('fdiv', ('u2f', ('vec4', ('extract_uword', 'v', 0),
++                               ('extract_uword', 'v', 1), 0, 0)),
++              65535.0),
++     'options->lower_unpack_unorm_2x16'),
++
++    (('unpack_unorm_4x8', 'v'),
++     ('fdiv', ('u2f', ('vec4', ('extract_ubyte', 'v', 0),
++                               ('extract_ubyte', 'v', 1),
++                               ('extract_ubyte', 'v', 2),
++                               ('extract_ubyte', 'v', 3))),
++              255.0),
++     'options->lower_unpack_unorm_4x8'),
++
++    (('unpack_snorm_2x16', 'v'),
++     ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_iword', 'v', 0),
++                                                            ('extract_iword', 'v', 1), 0, 0)),
++                                           32767.0))),
++     'options->lower_unpack_snorm_2x16'),
++
++    (('unpack_snorm_4x8', 'v'),
++     ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_ibyte', 'v', 0),
++                                                            ('extract_ibyte', 'v', 1),
++                                                            ('extract_ibyte', 'v', 2),
++                                                            ('extract_ibyte', 'v', 3))),
++                                           127.0))),
++     'options->lower_unpack_snorm_4x8'),
+ ]
+ 
+ # Add optimizations to handle the case where the result of a ternary is
+ # compared to a constant.  This way we can take things like
+ #
+ # (a ? 0 : 1) > 0
+ #
+ # and turn it into
+ #
+ # a ? (0 > 0) : (1 > 0)
+ #
+ # which constant folding will eat for lunch.  The resulting ternary will
+ # further get cleaned up by the boolean reductions above and we will be
+ # left with just the original variable "a".
+ for op in ['flt', 'fge', 'feq', 'fne',
+            'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']:
+    optimizations += [
+       ((op, ('bcsel', 'a', '#b', '#c'), '#d'),
+        ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))),
+       ((op, '#d', ('bcsel', a, '#b', '#c')),
+        ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
+    ]
+ 
+ # This section contains "late" optimizations that should be run after the
+ # regular optimizations have finished.  Optimizations should go here if
+ # they help code generation but do not necessarily produce code that is
+ # more easily optimizable.
+ late_optimizations = [
+    (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
+    (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+    (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+    (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+    (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
+    (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
+    (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
+    (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
+ ]
+ 
+ print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
+ print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
+                                   late_optimizations).render()
diff --cc src/compiler/nir/nir_phi_builder.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..5429083e5c802eaea2f225402c09cd23da24d765

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/nir_phi_builder.c
@@@ -1,0 -1,0 +1,254 @@@
++/*
++ * Copyright © 2016 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "nir_phi_builder.h"
++#include "nir/nir_vla.h"
++
++struct nir_phi_builder {
++   nir_shader *shader;
++   nir_function_impl *impl;
++
++   /* Copied from the impl for easy access */
++   unsigned num_blocks;
++
++   /* Array of all blocks indexed by block->index. */
++   nir_block **blocks;
++
++   /* Hold on to the values so we can easily iterate over them. */
++   struct exec_list values;
++
++   /* Worklist for phi adding */
++   unsigned iter_count;
++   unsigned *work;
++   nir_block **W;
++};
++
++#define NEEDS_PHI ((nir_ssa_def *)(intptr_t)-1)
++
++struct nir_phi_builder_value {
++   struct exec_node node;
++
++   struct nir_phi_builder *builder;
++
++   /* Needed so we can create phis and undefs */
++   unsigned num_components;
++
++   /* The list of phi nodes associated with this value.  Phi nodes are not
++    * added directly.  Instead, they are created, the instr->block pointer
++    * set, and then added to this list.  Later, in phi_builder_finish, we
++    * set up their sources and add them to the top of their respective
++    * blocks.
++    */
++   struct exec_list phis;
++
++   /* Array of SSA defs, indexed by block.  If a phi needs to be inserted
++    * in a given block, it will have the magic value NEEDS_PHI.
++    */
++   nir_ssa_def *defs[0];
++};
++
++static bool
++fill_block_array(nir_block *block, void *void_data)
++{
++   nir_block **blocks = void_data;
++   blocks[block->index] = block;
++   return true;
++}
++
++struct nir_phi_builder *
++nir_phi_builder_create(nir_function_impl *impl)
++{
++   struct nir_phi_builder *pb = ralloc(NULL, struct nir_phi_builder);
++
++   pb->shader = impl->function->shader;
++   pb->impl = impl;
++
++   assert(impl->valid_metadata & (nir_metadata_block_index |
++                                  nir_metadata_dominance));
++
++   pb->num_blocks = impl->num_blocks;
++   pb->blocks = ralloc_array(pb, nir_block *, pb->num_blocks);
++   nir_foreach_block(impl, fill_block_array, pb->blocks);
++
++   exec_list_make_empty(&pb->values);
++
++   pb->iter_count = 0;
++   pb->work = rzalloc_array(pb, unsigned, pb->num_blocks);
++   pb->W = ralloc_array(pb, nir_block *, pb->num_blocks);
++
++   return pb;
++}
++
++struct nir_phi_builder_value *
++nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
++                          const BITSET_WORD *defs)
++{
++   struct nir_phi_builder_value *val;
++   unsigned i, w_start = 0, w_end = 0;
++
++   val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks);
++   val->builder = pb;
++   val->num_components = num_components;
++   exec_list_make_empty(&val->phis);
++   exec_list_push_tail(&pb->values, &val->node);
++
++   pb->iter_count++;
++
++   BITSET_WORD tmp;
++   BITSET_FOREACH_SET(i, tmp, defs, pb->num_blocks) {
++      if (pb->work[i] < pb->iter_count)
++         pb->W[w_end++] = pb->blocks[i];
++      pb->work[i] = pb->iter_count;
++   }
++
++   while (w_start != w_end) {
++      nir_block *cur = pb->W[w_start++];
++      struct set_entry *dom_entry;
++      set_foreach(cur->dom_frontier, dom_entry) {
++         nir_block *next = (nir_block *) dom_entry->key;
++
++         /*
++          * If there's more than one return statement, then the end block
++          * can be a join point for some definitions. However, there are
++          * no instructions in the end block, so nothing would use those
++          * phi nodes. Of course, we couldn't place those phi nodes
++          * anyways due to the restriction of having no instructions in the
++          * end block...
++          */
++         if (next == pb->impl->end_block)
++            continue;
++
++         if (val->defs[next->index] == NULL) {
++            val->defs[next->index] = NEEDS_PHI;
++
++            if (pb->work[next->index] < pb->iter_count) {
++               pb->work[next->index] = pb->iter_count;
++               pb->W[w_end++] = next;
++            }
++         }
++      }
++   }
++
++   return val;
++}
++
++void
++nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val,
++                                    nir_block *block, nir_ssa_def *def)
++{
++   val->defs[block->index] = def;
++}
++
++nir_ssa_def *
++nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
++                                    nir_block *block)
++{
++   if (val->defs[block->index] == NULL) {
++      if (block->imm_dom) {
++         /* Grab it from our immediate dominator.  We'll stash it here for
++          * easy access later.
++          */
++         val->defs[block->index] =
++            nir_phi_builder_value_get_block_def(val, block->imm_dom);
++         return val->defs[block->index];
++      } else {
++         /* No immediate dominator means that this block is either the
++          * start block or unreachable.  In either case, the value is
++          * undefined so we need an SSA undef.
++          */
++         nir_ssa_undef_instr *undef =
++            nir_ssa_undef_instr_create(val->builder->shader,
++                                       val->num_components);
++         nir_instr_insert(nir_before_cf_list(&val->builder->impl->body),
++                          &undef->instr);
++         val->defs[block->index] = &undef->def;
++         return &undef->def;
++      }
++   } else if (val->defs[block->index] == NEEDS_PHI) {
++      /* If we need a phi instruction, go ahead and create one but don't
++       * add it to the program yet.  Later, we'll go through and set up phi
++       * sources and add the instructions will be added at that time.
++       */
++      nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader);
++      nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components, NULL);
++      phi->instr.block = block;
++      exec_list_push_tail(&val->phis, &phi->instr.node);
++      val->defs[block->index] = &phi->dest.ssa;
++      return &phi->dest.ssa;
++   } else {
++      return val->defs[block->index];
++   }
++}
++
++static int
++compare_blocks(const void *_a, const void *_b)
++{
++   nir_block * const * a = _a;
++   nir_block * const * b = _b;
++
++   return (*a)->index - (*b)->index;
++}
++
++void
++nir_phi_builder_finish(struct nir_phi_builder *pb)
++{
++   const unsigned num_blocks = pb->num_blocks;
++   NIR_VLA(nir_block *, preds, num_blocks);
++
++   foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) {
++      /* We can't iterate over the list of phis normally because we are
++       * removing them as we go and, in some cases, adding new phis as we
++       * build the source lists of others.
++       */
++      while (!exec_list_is_empty(&val->phis)) {
++         struct exec_node *head = exec_list_get_head(&val->phis);
++         nir_phi_instr *phi = exec_node_data(nir_phi_instr, head, instr.node);
++         assert(phi->instr.type == nir_instr_type_phi);
++
++         exec_node_remove(&phi->instr.node);
++
++         /* Construct an array of predecessors.  We sort it to ensure
++          * determinism in the phi insertion algorithm.
++          *
++          * XXX: Calling qsort this many times seems expensive.
++          */
++         int num_preds = 0;
++         struct set_entry *entry;
++         set_foreach(phi->instr.block->predecessors, entry)
++            preds[num_preds++] = (nir_block *)entry->key;
++         qsort(preds, num_preds, sizeof(*preds), compare_blocks);
++
++         for (unsigned i = 0; i < num_preds; i++) {
++            nir_phi_src *src = ralloc(phi, nir_phi_src);
++            src->pred = preds[i];
++            src->src = nir_src_for_ssa(
++               nir_phi_builder_value_get_block_def(val, preds[i]));
++            exec_list_push_tail(&phi->srcs, &src->node);
++         }
++
++         nir_instr_insert(nir_before_block(phi->instr.block), &phi->instr);
++      }
++   }
++
++   ralloc_free(pb);
++}
diff --cc src/compiler/nir/nir_phi_builder.h

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..50251bf1ba39fc770d9c5836afb4fbce5f28aa64

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/nir_phi_builder.h
@@@ -1,0 -1,0 +1,84 @@@
++/*
++ * Copyright © 2016 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#pragma once
++
++#include "nir.h"
++
++struct nir_phi_builder;
++struct nir_phi_builder_value;
++
++/* Create a new phi builder.
++ *
++ * While this is fairly cheap, it does allocate some memory and walk the list
++ * of blocks so it's recommended that you only call it once and use it to
++ * build phis for several values.
++ */
++struct nir_phi_builder *nir_phi_builder_create(nir_function_impl *impl);
++
++/* Register a value with the builder.
++ *
++ * The 'defs' parameter specifies a bitset of blocks in which the given value
++ * is defined.  This is used to determine where to place the phi nodes.
++ */
++struct nir_phi_builder_value *
++nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
++                          const BITSET_WORD *defs);
++
++/* Register a definition for the given value and block.
++ *
++ * It is safe to call this function as many times as you wish for any given
++ * block/value pair.  However, it always replaces whatever was there
++ * previously even if that definition is from a phi node.  The phi builder
++ * always uses the latest information it has, so you must be careful about the
++ * order in which you register definitions.  The final value at the end of the
++ * block must be the last value registered.
++ */
++void
++nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val,
++                                    nir_block *block, nir_ssa_def *def);
++
++/* Get the definition for the given value in the given block.
++ *
++ * This definition will always be the latest definition known for the given
++ * block.  If no definition is immediately available, it will crawl up the
++ * dominance tree and insert phi nodes as needed until it finds one.  In the
++ * case that no suitable definition is found, it will return the result of a
++ * nir_ssa_undef_instr with the correct number of components.
++ *
++ * Because this function only uses the latest available information for any
++ * given block, you must have already finished registering definitions for any
++ * blocks that dominate the current block in order to get the correct result.
++ */
++nir_ssa_def *
++nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
++                                    nir_block *block);
++
++/* Finish building phi nodes and free the builder.
++ *
++ * This function does far more than just free memory.  Prior to calling
++ * nir_phi_builder_finish, no phi nodes have actually been inserted in the
++ * program.  This function is what finishes setting up phi node sources and
++ * adds the phi nodes to the program.
++ */
++void nir_phi_builder_finish(struct nir_phi_builder *pb);
diff --cc src/compiler/nir/nir_print.c

index 0000000000000000000000000000000000000000,48ecb48a620e2d22f77bf5bba44e778e802cbcb6..f36b91de6e0f80a2d0aec92e81293cdc7b0ac6df

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@@ -1,0 -1,1069 +1,1089 @@@
- -                                "uniform ", "shader_storage", "system " };
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ #include "nir.h"
+ #include "compiler/shader_enums.h"
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ static void
+ print_tabs(unsigned num_tabs, FILE *fp)
+ {
+    for (unsigned i = 0; i < num_tabs; i++)
+       fprintf(fp, "\t");
+ }
+ 
+ typedef struct {
+    FILE *fp;
+    nir_shader *shader;
+    /** map from nir_variable -> printable name */
+    struct hash_table *ht;
+ 
+    /** set of names used so far for nir_variables */
+    struct set *syms;
+ 
+    /* an index used to make new non-conflicting names */
+    unsigned index;
+ } print_state;
+ 
+ static void
+ print_register(nir_register *reg, print_state *state)
+ {
+    FILE *fp = state->fp;
+    if (reg->name != NULL)
+       fprintf(fp, "/* %s */ ", reg->name);
+    if (reg->is_global)
+       fprintf(fp, "gr%u", reg->index);
+    else
+       fprintf(fp, "r%u", reg->index);
+ }
+ 
+ static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" };
+ 
+ static void
+ print_register_decl(nir_register *reg, print_state *state)
+ {
+    FILE *fp = state->fp;
+    fprintf(fp, "decl_reg %s ", sizes[reg->num_components]);
+    if (reg->is_packed)
+       fprintf(fp, "(packed) ");
+    print_register(reg, state);
+    if (reg->num_array_elems != 0)
+       fprintf(fp, "[%u]", reg->num_array_elems);
+    fprintf(fp, "\n");
+ }
+ 
+ static void
+ print_ssa_def(nir_ssa_def *def, print_state *state)
+ {
+    FILE *fp = state->fp;
+    if (def->name != NULL)
+       fprintf(fp, "/* %s */ ", def->name);
+    fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index);
+ }
+ 
+ static void
+ print_ssa_use(nir_ssa_def *def, print_state *state)
+ {
+    FILE *fp = state->fp;
+    if (def->name != NULL)
+       fprintf(fp, "/* %s */ ", def->name);
+    fprintf(fp, "ssa_%u", def->index);
+ }
+ 
+ static void print_src(nir_src *src, print_state *state);
+ 
+ static void
+ print_reg_src(nir_reg_src *src, print_state *state)
+ {
+    FILE *fp = state->fp;
+    print_register(src->reg, state);
+    if (src->reg->num_array_elems != 0) {
+       fprintf(fp, "[%u", src->base_offset);
+       if (src->indirect != NULL) {
+          fprintf(fp, " + ");
+          print_src(src->indirect, state);
+       }
+       fprintf(fp, "]");
+    }
+ }
+ 
+ static void
+ print_reg_dest(nir_reg_dest *dest, print_state *state)
+ {
+    FILE *fp = state->fp;
+    print_register(dest->reg, state);
+    if (dest->reg->num_array_elems != 0) {
+       fprintf(fp, "[%u", dest->base_offset);
+       if (dest->indirect != NULL) {
+          fprintf(fp, " + ");
+          print_src(dest->indirect, state);
+       }
+       fprintf(fp, "]");
+    }
+ }
+ 
+ static void
+ print_src(nir_src *src, print_state *state)
+ {
+    if (src->is_ssa)
+       print_ssa_use(src->ssa, state);
+    else
+       print_reg_src(&src->reg, state);
+ }
+ 
+ static void
+ print_dest(nir_dest *dest, print_state *state)
+ {
+    if (dest->is_ssa)
+       print_ssa_def(&dest->ssa, state);
+    else
+       print_reg_dest(&dest->reg, state);
+ }
+ 
+ static void
+ print_alu_src(nir_alu_instr *instr, unsigned src, print_state *state)
+ {
+    FILE *fp = state->fp;
+ 
+    if (instr->src[src].negate)
+       fprintf(fp, "-");
+    if (instr->src[src].abs)
+       fprintf(fp, "abs(");
+ 
+    print_src(&instr->src[src].src, state);
+ 
+    bool print_swizzle = false;
+    for (unsigned i = 0; i < 4; i++) {
+       if (!nir_alu_instr_channel_used(instr, src, i))
+          continue;
+ 
+       if (instr->src[src].swizzle[i] != i) {
+          print_swizzle = true;
+          break;
+       }
+    }
+ 
+    if (print_swizzle) {
+       fprintf(fp, ".");
+       for (unsigned i = 0; i < 4; i++) {
+          if (!nir_alu_instr_channel_used(instr, src, i))
+             continue;
+ 
+          fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]);
+       }
+    }
+ 
+    if (instr->src[src].abs)
+       fprintf(fp, ")");
+ }
+ 
+ static void
+ print_alu_dest(nir_alu_dest *dest, print_state *state)
+ {
+    FILE *fp = state->fp;
+    /* we're going to print the saturate modifier later, after the opcode */
+ 
+    print_dest(&dest->dest, state);
+ 
+    if (!dest->dest.is_ssa &&
+        dest->write_mask != (1 << dest->dest.reg.reg->num_components) - 1) {
+       fprintf(fp, ".");
+       for (unsigned i = 0; i < 4; i++)
+          if ((dest->write_mask >> i) & 1)
+             fprintf(fp, "%c", "xyzw"[i]);
+    }
+ }
+ 
+ static void
+ print_alu_instr(nir_alu_instr *instr, print_state *state)
+ {
+    FILE *fp = state->fp;
+ 
+    print_alu_dest(&instr->dest, state);
+ 
+    fprintf(fp, " = %s", nir_op_infos[instr->op].name);
+    if (instr->dest.saturate)
+       fprintf(fp, ".sat");
+    fprintf(fp, " ");
+ 
+    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+       if (i != 0)
+          fprintf(fp, ", ");
+ 
+       print_alu_src(instr, i, state);
+    }
+ }
+ 
++static const char *
++get_var_name(nir_variable *var, print_state *state)
++{
++   if (state->ht == NULL)
++      return var->name;
++
++   assert(state->syms);
++
++   struct hash_entry *entry = _mesa_hash_table_search(state->ht, var);
++   if (entry)
++      return entry->data;
++
++   char *name;
++   if (var->name == NULL) {
++      name = ralloc_asprintf(state->syms, "@%u", state->index++);
++   } else {
++      struct set_entry *set_entry = _mesa_set_search(state->syms, var->name);
++      if (set_entry != NULL) {
++         /* we have a collision with another name, append an @ + a unique
++          * index */
++         name = ralloc_asprintf(state->syms, "%s@%u", var->name,
++                                state->index++);
++      } else {
++         /* Mark this one as seen */
++         _mesa_set_add(state->syms, var->name);
++         name = var->name;
++      }
++   }
++
++   _mesa_hash_table_insert(state->ht, var, name);
++
++   return name;
++}
++
+ static void
+ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state)
+ {
+    FILE *fp = state->fp;
+    unsigned total_elems = glsl_get_components(type);
+    unsigned i;
+ 
+    switch (glsl_get_base_type(type)) {
+    case GLSL_TYPE_UINT:
+    case GLSL_TYPE_INT:
+    case GLSL_TYPE_BOOL:
+       for (i = 0; i < total_elems; i++) {
+          if (i > 0) fprintf(fp, ", ");
+          fprintf(fp, "0x%08x", c->value.u[i]);
+       }
+       break;
+ 
+    case GLSL_TYPE_FLOAT:
+       for (i = 0; i < total_elems; i++) {
+          if (i > 0) fprintf(fp, ", ");
+          fprintf(fp, "%f", c->value.f[i]);
+       }
+       break;
+ 
+    case GLSL_TYPE_STRUCT:
+       for (i = 0; i < c->num_elements; i++) {
+          if (i > 0) fprintf(fp, ", ");
+          fprintf(fp, "{ ");
+          print_constant(c->elements[i], glsl_get_struct_field(type, i), state);
+          fprintf(fp, " }");
+       }
+       break;
+ 
+    case GLSL_TYPE_ARRAY:
+       for (i = 0; i < c->num_elements; i++) {
+          if (i > 0) fprintf(fp, ", ");
+          fprintf(fp, "{ ");
+          print_constant(c->elements[i], glsl_get_array_element(type), state);
+          fprintf(fp, " }");
+       }
+       break;
+ 
+    default:
+       unreachable("not reached");
+    }
+ }
+ 
+ static void
+ print_var_decl(nir_variable *var, print_state *state)
+ {
+    FILE *fp = state->fp;
+ 
+    fprintf(fp, "decl_var ");
+ 
+    const char *const cent = (var->data.centroid) ? "centroid " : "";
+    const char *const samp = (var->data.sample) ? "sample " : "";
+    const char *const patch = (var->data.patch) ? "patch " : "";
+    const char *const inv = (var->data.invariant) ? "invariant " : "";
+    const char *const mode[] = { "shader_in ", "shader_out ", "", "",
- -   struct set_entry *entry = NULL;
- -   if (state->syms)
- -      entry = _mesa_set_search(state->syms, var->name);
- -
- -   char *name;
- -
- -   if (entry != NULL) {
- -      /* we have a collision with another name, append an @ + a unique index */
- -      name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++);
- -   } else {
- -      name = var->name;
- -   }
- -
- -   fprintf(fp, " %s", name);
++                                "uniform ", "shader_storage ", "shared ",
++                                "system "};
+ 
+    fprintf(fp, "%s%s%s%s%s%s ",
+       cent, samp, patch, inv, mode[var->data.mode],
+         glsl_interp_qualifier_name(var->data.interpolation));
+ 
+    glsl_print_type(var->type, fp);
+ 
- -
- -   if (state->syms) {
- -      _mesa_set_add(state->syms, name);
- -      _mesa_hash_table_insert(state->ht, var, name);
- -   }
++   fprintf(fp, " %s", get_var_name(var, state));
+ 
+    if (var->data.mode == nir_var_shader_in ||
+        var->data.mode == nir_var_shader_out ||
+        var->data.mode == nir_var_uniform ||
+        var->data.mode == nir_var_shader_storage) {
+       const char *loc = NULL;
+       char buf[4];
+ 
+       switch (state->shader->stage) {
+       case MESA_SHADER_VERTEX:
+          if (var->data.mode == nir_var_shader_in)
+             loc = gl_vert_attrib_name(var->data.location);
+          else if (var->data.mode == nir_var_shader_out)
+             loc = gl_varying_slot_name(var->data.location);
+          break;
+       case MESA_SHADER_GEOMETRY:
+          if ((var->data.mode == nir_var_shader_in) ||
+              (var->data.mode == nir_var_shader_out))
+             loc = gl_varying_slot_name(var->data.location);
+          break;
+       case MESA_SHADER_FRAGMENT:
+          if (var->data.mode == nir_var_shader_in)
+             loc = gl_varying_slot_name(var->data.location);
+          else if (var->data.mode == nir_var_shader_out)
+             loc = gl_frag_result_name(var->data.location);
+          break;
+       case MESA_SHADER_TESS_CTRL:
+       case MESA_SHADER_TESS_EVAL:
+       case MESA_SHADER_COMPUTE:
+       default:
+          /* TODO */
+          break;
+       }
+ 
+       if (!loc) {
+          snprintf(buf, sizeof(buf), "%u", var->data.location);
+          loc = buf;
+       }
+ 
+       fprintf(fp, " (%s, %u)", loc, var->data.driver_location);
+    }
+ 
+    if (var->constant_initializer) {
+       fprintf(fp, " = { ");
+       print_constant(var->constant_initializer, var->type, state);
+       fprintf(fp, " }");
+    }
+ 
+    fprintf(fp, "\n");
- -   const char *name;
- -   if (state->ht) {
- -      struct hash_entry *entry = _mesa_hash_table_search(state->ht, var);
- -
- -      assert(entry != NULL);
- -      name = entry->data;
- -   } else {
- -      name = var->name;
- -   }
- -
- -   fprintf(fp, "%s", name);
+ }
+ 
+ static void
+ print_var(nir_variable *var, print_state *state)
+ {
+    FILE *fp = state->fp;
- -      fprintf(fp, "%u", instr->sampler_index);
++   fprintf(fp, "%s", get_var_name(var, state));
+ }
+ 
+ static void
+ print_deref_var(nir_deref_var *deref, print_state *state)
+ {
+    print_var(deref->var, state);
+ }
+ 
+ static void
+ print_deref_array(nir_deref_array *deref, print_state *state)
+ {
+    FILE *fp = state->fp;
+    fprintf(fp, "[");
+    switch (deref->deref_array_type) {
+    case nir_deref_array_type_direct:
+       fprintf(fp, "%u", deref->base_offset);
+       break;
+    case nir_deref_array_type_indirect:
+       if (deref->base_offset != 0)
+          fprintf(fp, "%u + ", deref->base_offset);
+       print_src(&deref->indirect, state);
+       break;
+    case nir_deref_array_type_wildcard:
+       fprintf(fp, "*");
+       break;
+    }
+    fprintf(fp, "]");
+ }
+ 
+ static void
+ print_deref_struct(nir_deref_struct *deref, const struct glsl_type *parent_type,
+                    print_state *state)
+ {
+    FILE *fp = state->fp;
+    fprintf(fp, ".%s", glsl_get_struct_elem_name(parent_type, deref->index));
+ }
+ 
+ static void
+ print_deref(nir_deref_var *deref, print_state *state)
+ {
+    nir_deref *tail = &deref->deref;
+    nir_deref *pretail = NULL;
+    while (tail != NULL) {
+       switch (tail->deref_type) {
+       case nir_deref_type_var:
+          assert(pretail == NULL);
+          assert(tail == &deref->deref);
+          print_deref_var(deref, state);
+          break;
+ 
+       case nir_deref_type_array:
+          assert(pretail != NULL);
+          print_deref_array(nir_deref_as_array(tail), state);
+          break;
+ 
+       case nir_deref_type_struct:
+          assert(pretail != NULL);
+          print_deref_struct(nir_deref_as_struct(tail),
+                             pretail->type, state);
+          break;
+ 
+       default:
+          unreachable("Invalid deref type");
+       }
+ 
+       pretail = tail;
+       tail = pretail->child;
+    }
+ }
+ 
+ static void
+ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
+ {
+    unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+    FILE *fp = state->fp;
+ 
+    if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
+       print_dest(&instr->dest, state);
+       fprintf(fp, " = ");
+    }
+ 
+    fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name);
+ 
+    for (unsigned i = 0; i < num_srcs; i++) {
+       if (i != 0)
+          fprintf(fp, ", ");
+ 
+       print_src(&instr->src[i], state);
+    }
+ 
+    fprintf(fp, ") (");
+ 
+    unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+ 
+    for (unsigned i = 0; i < num_vars; i++) {
+       if (i != 0)
+          fprintf(fp, ", ");
+ 
+       print_deref(instr->variables[i], state);
+    }
+ 
+    fprintf(fp, ") (");
+ 
+    unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices;
+ 
+    for (unsigned i = 0; i < num_indices; i++) {
+       if (i != 0)
+          fprintf(fp, ", ");
+ 
+       fprintf(fp, "%d", instr->const_index[i]);
+    }
+ 
+    fprintf(fp, ")");
+ 
+    if (!state->shader)
+       return;
+ 
+    struct exec_list *var_list = NULL;
+ 
+    switch (instr->intrinsic) {
+    case nir_intrinsic_load_uniform:
+       var_list = &state->shader->uniforms;
+       break;
+    case nir_intrinsic_load_input:
+    case nir_intrinsic_load_per_vertex_input:
+       var_list = &state->shader->inputs;
+       break;
+    case nir_intrinsic_load_output:
+    case nir_intrinsic_store_output:
+    case nir_intrinsic_store_per_vertex_output:
+       var_list = &state->shader->outputs;
+       break;
+    default:
+       return;
+    }
+ 
+    nir_foreach_variable(var, var_list) {
+       if ((var->data.driver_location == instr->const_index[0]) &&
+           var->name) {
+          fprintf(fp, "\t/* %s */", var->name);
+          break;
+       }
+    }
+ }
+ 
+ static void
+ print_tex_instr(nir_tex_instr *instr, print_state *state)
+ {
+    FILE *fp = state->fp;
+ 
+    print_dest(&instr->dest, state);
+ 
+    fprintf(fp, " = ");
+ 
+    switch (instr->op) {
+    case nir_texop_tex:
+       fprintf(fp, "tex ");
+       break;
+    case nir_texop_txb:
+       fprintf(fp, "txb ");
+       break;
+    case nir_texop_txl:
+       fprintf(fp, "txl ");
+       break;
+    case nir_texop_txd:
+       fprintf(fp, "txd ");
+       break;
+    case nir_texop_txf:
+       fprintf(fp, "txf ");
+       break;
+    case nir_texop_txf_ms:
+       fprintf(fp, "txf_ms ");
+       break;
+    case nir_texop_txs:
+       fprintf(fp, "txs ");
+       break;
+    case nir_texop_lod:
+       fprintf(fp, "lod ");
+       break;
+    case nir_texop_tg4:
+       fprintf(fp, "tg4 ");
+       break;
+    case nir_texop_query_levels:
+       fprintf(fp, "query_levels ");
+       break;
+    case nir_texop_texture_samples:
+       fprintf(fp, "texture_samples ");
+       break;
+    case nir_texop_samples_identical:
+       fprintf(fp, "samples_identical ");
+       break;
+    default:
+       unreachable("Invalid texture operation");
+       break;
+    }
+ 
+    for (unsigned i = 0; i < instr->num_srcs; i++) {
+       print_src(&instr->src[i].src, state);
+ 
+       fprintf(fp, " ");
+ 
+       switch(instr->src[i].src_type) {
+       case nir_tex_src_coord:
+          fprintf(fp, "(coord)");
+          break;
+       case nir_tex_src_projector:
+          fprintf(fp, "(projector)");
+          break;
+       case nir_tex_src_comparitor:
+          fprintf(fp, "(comparitor)");
+          break;
+       case nir_tex_src_offset:
+          fprintf(fp, "(offset)");
+          break;
+       case nir_tex_src_bias:
+          fprintf(fp, "(bias)");
+          break;
+       case nir_tex_src_lod:
+          fprintf(fp, "(lod)");
+          break;
+       case nir_tex_src_ms_index:
+          fprintf(fp, "(ms_index)");
+          break;
+       case nir_tex_src_ddx:
+          fprintf(fp, "(ddx)");
+          break;
+       case nir_tex_src_ddy:
+          fprintf(fp, "(ddy)");
+          break;
++      case nir_tex_src_texture_offset:
++         fprintf(fp, "(texture_offset)");
++         break;
+       case nir_tex_src_sampler_offset:
+          fprintf(fp, "(sampler_offset)");
+          break;
+ 
+       default:
+          unreachable("Invalid texture source type");
+          break;
+       }
+ 
+       fprintf(fp, ", ");
+    }
+ 
+    bool has_nonzero_offset = false;
+    for (unsigned i = 0; i < 4; i++) {
+       if (instr->const_offset[i] != 0) {
+          has_nonzero_offset = true;
+          break;
+       }
+    }
+ 
+    if (has_nonzero_offset) {
+       fprintf(fp, "[%i %i %i %i] (offset), ",
+               instr->const_offset[0], instr->const_offset[1],
+               instr->const_offset[2], instr->const_offset[3]);
+    }
+ 
+    if (instr->op == nir_texop_tg4) {
+       fprintf(fp, "%u (gather_component), ", instr->component);
+    }
+ 
++   if (instr->texture) {
++      assert(instr->sampler);
++      fprintf(fp, " (texture)");
++   }
+    if (instr->sampler) {
+       print_deref(instr->sampler, state);
++      fprintf(fp, " (sampler)");
+    } else {
- -
- -   fprintf(fp, " (sampler)");
++      assert(instr->texture == NULL);
++      fprintf(fp, "%u (texture) %u (sampler)",
++              instr->texture_index, instr->sampler_index);
+    }
+ }
+ 
+ static void
+ print_call_instr(nir_call_instr *instr, print_state *state)
+ {
+    FILE *fp = state->fp;
+ 
+    fprintf(fp, "call %s ", instr->callee->name);
+ 
+    for (unsigned i = 0; i < instr->num_params; i++) {
+       if (i != 0)
+          fprintf(fp, ", ");
+ 
+       print_deref(instr->params[i], state);
+    }
+ 
+    if (instr->return_deref != NULL) {
+       if (instr->num_params != 0)
+          fprintf(fp, ", ");
+       fprintf(fp, "returning ");
+       print_deref(instr->return_deref, state);
+    }
+ }
+ 
+ static void
+ print_load_const_instr(nir_load_const_instr *instr, print_state *state)
+ {
+    FILE *fp = state->fp;
+ 
+    print_ssa_def(&instr->def, state);
+ 
+    fprintf(fp, " = load_const (");
+ 
+    for (unsigned i = 0; i < instr->def.num_components; i++) {
+       if (i != 0)
+          fprintf(fp, ", ");
+ 
+       /*
+        * we don't really know the type of the constant (if it will be used as a
+        * float or an int), so just print the raw constant in hex for fidelity
+        * and then print the float in a comment for readability.
+        */
+ 
+       fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]);
+    }
+ 
+    fprintf(fp, ")");
+ }
+ 
+ static void
+ print_jump_instr(nir_jump_instr *instr, print_state *state)
+ {
+    FILE *fp = state->fp;
+ 
+    switch (instr->type) {
+    case nir_jump_break:
+       fprintf(fp, "break");
+       break;
+ 
+    case nir_jump_continue:
+       fprintf(fp, "continue");
+       break;
+ 
+    case nir_jump_return:
+       fprintf(fp, "return");
+       break;
+    }
+ }
+ 
+ static void
+ print_ssa_undef_instr(nir_ssa_undef_instr* instr, print_state *state)
+ {
+    FILE *fp = state->fp;
+    print_ssa_def(&instr->def, state);
+    fprintf(fp, " = undefined");
+ }
+ 
+ static void
+ print_phi_instr(nir_phi_instr *instr, print_state *state)
+ {
+    FILE *fp = state->fp;
+    print_dest(&instr->dest, state);
+    fprintf(fp, " = phi ");
+    nir_foreach_phi_src(instr, src) {
+       if (&src->node != exec_list_get_head(&instr->srcs))
+          fprintf(fp, ", ");
+ 
+       fprintf(fp, "block_%u: ", src->pred->index);
+       print_src(&src->src, state);
+    }
+ }
+ 
+ static void
+ print_parallel_copy_instr(nir_parallel_copy_instr *instr, print_state *state)
+ {
+    FILE *fp = state->fp;
+    nir_foreach_parallel_copy_entry(instr, entry) {
+       if (&entry->node != exec_list_get_head(&instr->entries))
+          fprintf(fp, "; ");
+ 
+       print_dest(&entry->dest, state);
+       fprintf(fp, " = ");
+       print_src(&entry->src, state);
+    }
+ }
+ 
+ static void
+ print_instr(const nir_instr *instr, print_state *state, unsigned tabs)
+ {
+    FILE *fp = state->fp;
+    print_tabs(tabs, fp);
+ 
+    switch (instr->type) {
+    case nir_instr_type_alu:
+       print_alu_instr(nir_instr_as_alu(instr), state);
+       break;
+ 
+    case nir_instr_type_call:
+       print_call_instr(nir_instr_as_call(instr), state);
+       break;
+ 
+    case nir_instr_type_intrinsic:
+       print_intrinsic_instr(nir_instr_as_intrinsic(instr), state);
+       break;
+ 
+    case nir_instr_type_tex:
+       print_tex_instr(nir_instr_as_tex(instr), state);
+       break;
+ 
+    case nir_instr_type_load_const:
+       print_load_const_instr(nir_instr_as_load_const(instr), state);
+       break;
+ 
+    case nir_instr_type_jump:
+       print_jump_instr(nir_instr_as_jump(instr), state);
+       break;
+ 
+    case nir_instr_type_ssa_undef:
+       print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state);
+       break;
+ 
+    case nir_instr_type_phi:
+       print_phi_instr(nir_instr_as_phi(instr), state);
+       break;
+ 
+    case nir_instr_type_parallel_copy:
+       print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), state);
+       break;
+ 
+    default:
+       unreachable("Invalid instruction type");
+       break;
+    }
+ }
+ 
+ static int
+ compare_block_index(const void *p1, const void *p2)
+ {
+    const nir_block *block1 = *((const nir_block **) p1);
+    const nir_block *block2 = *((const nir_block **) p2);
+ 
+    return (int) block1->index - (int) block2->index;
+ }
+ 
+ static void print_cf_node(nir_cf_node *node, print_state *state,
+                           unsigned tabs);
+ 
+ static void
+ print_block(nir_block *block, print_state *state, unsigned tabs)
+ {
+    FILE *fp = state->fp;
+ 
+    print_tabs(tabs, fp);
+    fprintf(fp, "block block_%u:\n", block->index);
+ 
+    /* sort the predecessors by index so we consistently print the same thing */
+ 
+    nir_block **preds =
+       malloc(block->predecessors->entries * sizeof(nir_block *));
+ 
+    struct set_entry *entry;
+    unsigned i = 0;
+    set_foreach(block->predecessors, entry) {
+       preds[i++] = (nir_block *) entry->key;
+    }
+ 
+    qsort(preds, block->predecessors->entries, sizeof(nir_block *),
+          compare_block_index);
+ 
+    print_tabs(tabs, fp);
+    fprintf(fp, "/* preds: ");
+    for (unsigned i = 0; i < block->predecessors->entries; i++) {
+       fprintf(fp, "block_%u ", preds[i]->index);
+    }
+    fprintf(fp, "*/\n");
+ 
+    free(preds);
+ 
+    nir_foreach_instr(block, instr) {
+       print_instr(instr, state, tabs);
+       fprintf(fp, "\n");
+    }
+ 
+    print_tabs(tabs, fp);
+    fprintf(fp, "/* succs: ");
+    for (unsigned i = 0; i < 2; i++)
+       if (block->successors[i]) {
+          fprintf(fp, "block_%u ", block->successors[i]->index);
+       }
+    fprintf(fp, "*/\n");
+ }
+ 
+ static void
+ print_if(nir_if *if_stmt, print_state *state, unsigned tabs)
+ {
+    FILE *fp = state->fp;
+ 
+    print_tabs(tabs, fp);
+    fprintf(fp, "if ");
+    print_src(&if_stmt->condition, state);
+    fprintf(fp, " {\n");
+    foreach_list_typed(nir_cf_node, node, node, &if_stmt->then_list) {
+       print_cf_node(node, state, tabs + 1);
+    }
+    print_tabs(tabs, fp);
+    fprintf(fp, "} else {\n");
+    foreach_list_typed(nir_cf_node, node, node, &if_stmt->else_list) {
+       print_cf_node(node, state, tabs + 1);
+    }
+    print_tabs(tabs, fp);
+    fprintf(fp, "}\n");
+ }
+ 
+ static void
+ print_loop(nir_loop *loop, print_state *state, unsigned tabs)
+ {
+    FILE *fp = state->fp;
+ 
+    print_tabs(tabs, fp);
+    fprintf(fp, "loop {\n");
+    foreach_list_typed(nir_cf_node, node, node, &loop->body) {
+       print_cf_node(node, state, tabs + 1);
+    }
+    print_tabs(tabs, fp);
+    fprintf(fp, "}\n");
+ }
+ 
+ static void
+ print_cf_node(nir_cf_node *node, print_state *state, unsigned int tabs)
+ {
+    switch (node->type) {
+    case nir_cf_node_block:
+       print_block(nir_cf_node_as_block(node), state, tabs);
+       break;
+ 
+    case nir_cf_node_if:
+       print_if(nir_cf_node_as_if(node), state, tabs);
+       break;
+ 
+    case nir_cf_node_loop:
+       print_loop(nir_cf_node_as_loop(node), state, tabs);
+       break;
+ 
+    default:
+       unreachable("Invalid CFG node type");
+    }
+ }
+ 
+ static void
+ print_function_impl(nir_function_impl *impl, print_state *state)
+ {
+    FILE *fp = state->fp;
+ 
+    fprintf(fp, "\nimpl %s ", impl->function->name);
+ 
+    for (unsigned i = 0; i < impl->num_params; i++) {
+       if (i != 0)
+          fprintf(fp, ", ");
+ 
+       print_var(impl->params[i], state);
+    }
+ 
+    if (impl->return_var != NULL) {
+       if (impl->num_params != 0)
+          fprintf(fp, ", ");
+       fprintf(fp, "returning ");
+       print_var(impl->return_var, state);
+    }
+ 
+    fprintf(fp, "{\n");
+ 
+    nir_foreach_variable(var, &impl->locals) {
+       fprintf(fp, "\t");
+       print_var_decl(var, state);
+    }
+ 
+    foreach_list_typed(nir_register, reg, node, &impl->registers) {
+       fprintf(fp, "\t");
+       print_register_decl(reg, state);
+    }
+ 
+    nir_index_blocks(impl);
+ 
+    foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+       print_cf_node(node, state, 1);
+    }
+ 
+    fprintf(fp, "\tblock block_%u:\n}\n\n", impl->end_block->index);
+ }
+ 
+ static void
+ print_function(nir_function *function, print_state *state)
+ {
+    FILE *fp = state->fp;
+ 
+    fprintf(fp, "decl_function %s ", function->name);
+ 
+    for (unsigned i = 0; i < function->num_params; i++) {
+       if (i != 0)
+          fprintf(fp, ", ");
+ 
+       switch (function->params[i].param_type) {
+       case nir_parameter_in:
+          fprintf(fp, "in ");
+          break;
+       case nir_parameter_out:
+          fprintf(fp, "out ");
+          break;
+       case nir_parameter_inout:
+          fprintf(fp, "inout ");
+          break;
+       default:
+          unreachable("Invalid parameter type");
+       }
+ 
+       glsl_print_type(function->params[i].type, fp);
+    }
+ 
+    if (function->return_type != NULL) {
+       if (function->num_params != 0)
+          fprintf(fp, ", ");
+       fprintf(fp, "returning ");
+       glsl_print_type(function->return_type, fp);
+    }
+ 
+    fprintf(fp, "\n");
+ 
+    if (function->impl != NULL) {
+       print_function_impl(function->impl, state);
+       return;
+    }
+ }
+ 
+ static void
+ init_print_state(print_state *state, nir_shader *shader, FILE *fp)
+ {
+    state->fp = fp;
+    state->shader = shader;
+    state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                        _mesa_key_pointer_equal);
+    state->syms = _mesa_set_create(NULL, _mesa_key_hash_string,
+                                   _mesa_key_string_equal);
+    state->index = 0;
+ }
+ 
+ static void
+ destroy_print_state(print_state *state)
+ {
+    _mesa_hash_table_destroy(state->ht, NULL);
+    _mesa_set_destroy(state->syms, NULL);
+ }
+ 
+ void
+ nir_print_shader(nir_shader *shader, FILE *fp)
+ {
+    print_state state;
+    init_print_state(&state, shader, fp);
+ 
+    fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage));
+ 
+    if (shader->info.name)
+       fprintf(fp, "name: %s\n", shader->info.name);
+ 
+    if (shader->info.label)
+       fprintf(fp, "label: %s\n", shader->info.label);
+ 
+    fprintf(fp, "inputs: %u\n", shader->num_inputs);
+    fprintf(fp, "outputs: %u\n", shader->num_outputs);
+    fprintf(fp, "uniforms: %u\n", shader->num_uniforms);
++   fprintf(fp, "shared: %u\n", shader->num_shared);
+ 
+    nir_foreach_variable(var, &shader->uniforms) {
+       print_var_decl(var, &state);
+    }
+ 
+    nir_foreach_variable(var, &shader->inputs) {
+       print_var_decl(var, &state);
+    }
+ 
+    nir_foreach_variable(var, &shader->outputs) {
+       print_var_decl(var, &state);
+    }
+ 
++   nir_foreach_variable(var, &shader->shared) {
++      print_var_decl(var, &state);
++   }
++
+    nir_foreach_variable(var, &shader->globals) {
+       print_var_decl(var, &state);
+    }
+ 
+    nir_foreach_variable(var, &shader->system_values) {
+       print_var_decl(var, &state);
+    }
+ 
+    foreach_list_typed(nir_register, reg, node, &shader->registers) {
+       print_register_decl(reg, &state);
+    }
+ 
+    foreach_list_typed(nir_function, func, node, &shader->functions) {
+       print_function(func, &state);
+    }
+ 
+    destroy_print_state(&state);
+ }
+ 
+ void
+ nir_print_instr(const nir_instr *instr, FILE *fp)
+ {
+    print_state state = {
+       .fp = fp,
+    };
+    print_instr(instr, &state, 0);
+ 
+ }
diff --cc src/compiler/nir/nir_remove_dead_variables.c

index 0000000000000000000000000000000000000000,db754e56b1c08a68917a8c6ef6793ac2d925835c..792c5d4aae6fa5379316756ae7a94787cd4c7235

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_remove_dead_variables.c
+++ b/src/compiler/nir/nir_remove_dead_variables.c
@@@ -1,0 -1,141 +1,156 @@@
- -nir_remove_dead_variables(nir_shader *shader)
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ #include "nir.h"
+ 
+ static void
+ add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live)
+ {
+    unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+    for (unsigned i = 0; i < num_vars; i++) {
+       nir_variable *var = instr->variables[i]->var;
+       _mesa_set_add(live, var);
+    }
+ }
+ 
+ static void
+ add_var_use_call(nir_call_instr *instr, struct set *live)
+ {
+    if (instr->return_deref != NULL) {
+       nir_variable *var = instr->return_deref->var;
+       _mesa_set_add(live, var);
+    }
+ 
+    for (unsigned i = 0; i < instr->num_params; i++) {
+       nir_variable *var = instr->params[i]->var;
+       _mesa_set_add(live, var);
+    }
+ }
+ 
+ static void
+ add_var_use_tex(nir_tex_instr *instr, struct set *live)
+ {
+    if (instr->sampler != NULL) {
+       nir_variable *var = instr->sampler->var;
+       _mesa_set_add(live, var);
+    }
+ }
+ 
+ static bool
+ add_var_use_block(nir_block *block, void *state)
+ {
+    struct set *live = state;
+ 
+    nir_foreach_instr(block, instr) {
+       switch(instr->type) {
+       case nir_instr_type_intrinsic:
+          add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live);
+          break;
+ 
+       case nir_instr_type_call:
+          add_var_use_call(nir_instr_as_call(instr), live);
+          break;
+ 
+       case nir_instr_type_tex:
+          add_var_use_tex(nir_instr_as_tex(instr), live);
+          break;
+ 
+       default:
+          break;
+       }
+    }
+ 
+    return true;
+ }
+ 
+ static void
+ add_var_use_shader(nir_shader *shader, struct set *live)
+ {
+    nir_foreach_function(shader, function) {
+       if (function->impl) {
+          nir_foreach_block(function->impl, add_var_use_block, live);
+       }
+    }
+ }
+ 
+ static bool
+ remove_dead_vars(struct exec_list *var_list, struct set *live)
+ {
+    bool progress = false;
+ 
+    foreach_list_typed_safe(nir_variable, var, node, var_list) {
+       struct set_entry *entry = _mesa_set_search(live, var);
+       if (entry == NULL) {
+          exec_node_remove(&var->node);
+          ralloc_free(var);
+          progress = true;
+       }
+    }
+ 
+    return progress;
+ }
+ 
+ bool
- -   progress = remove_dead_vars(&shader->globals, live) || progress;
++nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode)
+ {
+    bool progress = false;
+    struct set *live =
+       _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ 
+    add_var_use_shader(shader, live);
+ 
- -   nir_foreach_function(shader, function) {
- -      if (function->impl) {
- -         if (remove_dead_vars(&function->impl->locals, live)) {
- -            nir_metadata_preserve(function->impl, nir_metadata_block_index |
- -                                                  nir_metadata_dominance |
- -                                                  nir_metadata_live_ssa_defs);
- -            progress = true;
++   if (mode == nir_var_uniform || mode == nir_var_all)
++      progress = remove_dead_vars(&shader->uniforms, live) || progress;
+ 
++   if (mode == nir_var_shader_in || mode == nir_var_all)
++      progress = remove_dead_vars(&shader->inputs, live) || progress;
++
++   if (mode == nir_var_shader_out || mode == nir_var_all)
++      progress = remove_dead_vars(&shader->outputs, live) || progress;
++
++   if (mode == nir_var_global || mode == nir_var_all)
++      progress = remove_dead_vars(&shader->globals, live) || progress;
++
++   if (mode == nir_var_system_value || mode == nir_var_all)
++      progress = remove_dead_vars(&shader->system_values, live) || progress;
++
++   if (mode == nir_var_local || mode == nir_var_all) {
++      nir_foreach_function(shader, function) {
++         if (function->impl) {
++            if (remove_dead_vars(&function->impl->locals, live)) {
++               nir_metadata_preserve(function->impl, nir_metadata_block_index |
++                                                     nir_metadata_dominance |
++                                                     nir_metadata_live_ssa_defs);
++               progress = true;
++            }
+          }
+       }
+    }
+ 
+    _mesa_set_destroy(live, NULL);
+    return progress;
+ }
diff --cc src/compiler/nir/nir_repair_ssa.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..3ab4f0f6db74b1c97555a1580c933f1c1e631580

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/nir_repair_ssa.c
@@@ -1,0 -1,0 +1,157 @@@
++/*
++ * Copyright © 2016 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "nir.h"
++#include "nir_phi_builder.h"
++
++struct repair_ssa_state {
++   nir_function_impl *impl;
++
++   BITSET_WORD *def_set;
++   struct nir_phi_builder *phi_builder;
++
++   bool progress;
++};
++
++/* Get ready to build a phi and return the builder */
++static struct nir_phi_builder *
++prep_build_phi(struct repair_ssa_state *state)
++{
++   const unsigned num_words = BITSET_WORDS(state->impl->num_blocks);
++
++   /* We create the phi builder on-demand. */
++   if (state->phi_builder == NULL) {
++      state->phi_builder = nir_phi_builder_create(state->impl);
++      state->def_set = ralloc_array(NULL, BITSET_WORD, num_words);
++   }
++
++   /* We're going to build a phi.  That's progress. */
++   state->progress = true;
++
++   /* Set the defs set to empty */
++   memset(state->def_set, 0, num_words * sizeof(*state->def_set));
++
++   return state->phi_builder;
++}
++
++static nir_block *
++get_src_block(nir_src *src)
++{
++   if (src->parent_instr->type == nir_instr_type_phi) {
++      return exec_node_data(nir_phi_src, src, src)->pred;
++   } else {
++      return src->parent_instr->block;
++   }
++}
++
++static bool
++repair_ssa_def(nir_ssa_def *def, void *void_state)
++{
++   struct repair_ssa_state *state = void_state;
++
++   bool is_valid = true;
++   nir_foreach_use(def, src) {
++      if (!nir_block_dominates(def->parent_instr->block, get_src_block(src))) {
++         is_valid = false;
++         break;
++      }
++   }
++
++   if (is_valid)
++      return true;
++
++   struct nir_phi_builder *pb = prep_build_phi(state);
++
++   BITSET_SET(state->def_set, def->parent_instr->block->index);
++
++   struct nir_phi_builder_value *val =
++      nir_phi_builder_add_value(pb, def->num_components, state->def_set);
++
++   nir_phi_builder_value_set_block_def(val, def->parent_instr->block, def);
++
++   nir_foreach_use_safe(def, src) {
++      nir_block *src_block = get_src_block(src);
++      if (!nir_block_dominates(def->parent_instr->block, src_block)) {
++         nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(
++            nir_phi_builder_value_get_block_def(val, src_block)));
++      }
++   }
++
++   return true;
++}
++
++static bool
++repair_ssa_block(nir_block *block, void *state)
++{
++   nir_foreach_instr_safe(block, instr) {
++      nir_foreach_ssa_def(instr, repair_ssa_def, state);
++   }
++
++   return true;
++}
++
++bool
++nir_repair_ssa_impl(nir_function_impl *impl)
++{
++   struct repair_ssa_state state;
++
++   state.impl = impl;
++   state.phi_builder = NULL;
++   state.progress = false;
++
++   nir_metadata_require(impl, nir_metadata_block_index |
++                              nir_metadata_dominance);
++
++   nir_foreach_block(impl, repair_ssa_block, &state);
++
++   if (state.progress)
++      nir_metadata_preserve(impl, nir_metadata_block_index |
++                                  nir_metadata_dominance);
++
++   if (state.phi_builder) {
++      nir_phi_builder_finish(state.phi_builder);
++      ralloc_free(state.def_set);
++   }
++
++   return state.progress;
++}
++
++/** This pass can be used to repair SSA form in a shader.
++ *
++ * Sometimes a transformation (such as return lowering) will have to make
++ * changes to a shader which, while still correct, break some of NIR's SSA
++ * invariants.  This pass will insert ssa_undefs and phi nodes as needed to
++ * get the shader back into SSA that the validator will like.
++ */
++bool
++nir_repair_ssa(nir_shader *shader)
++{
++   bool progress = false;
++
++   nir_foreach_function(shader, function) {
++      if (function->impl)
++         progress = nir_repair_ssa_impl(function->impl) || progress;
++   }
++
++   return progress;
++}
diff --cc src/compiler/nir/nir_sweep.c

index 0000000000000000000000000000000000000000,0710bdba7c7c950aa8346738c3aa1daf92e2cce7..5c62154ec7f69a30e869a7f7786a752941982977

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_sweep.c
+++ b/src/compiler/nir/nir_sweep.c
@@@ -1,0 -1,173 +1,174 @@@
+ /*
+  * Copyright © 2015 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  */
+ 
+ #include "nir.h"
+ 
+ /**
+  * \file nir_sweep.c
+  *
+  * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated
+  * memory - anything still connected to the program will be kept, and any dead memory
+  * we dropped on the floor will be freed.
+  *
+  * The expectation is that drivers should call this when finished compiling the shader
+  * (after any optimization, lowering, and so on).  However, it's also fine to call it
+  * earlier, and even many times, trading CPU cycles for memory savings.
+  */
+ 
+ #define steal_list(mem_ctx, type, list) \
+    foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }
+ 
+ static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
+ 
+ static bool
+ sweep_src_indirect(nir_src *src, void *nir)
+ {
+    if (!src->is_ssa && src->reg.indirect)
+       ralloc_steal(nir, src->reg.indirect);
+ 
+    return true;
+ }
+ 
+ static bool
+ sweep_dest_indirect(nir_dest *dest, void *nir)
+ {
+    if (!dest->is_ssa && dest->reg.indirect)
+       ralloc_steal(nir, dest->reg.indirect);
+ 
+    return true;
+ }
+ 
+ static void
+ sweep_block(nir_shader *nir, nir_block *block)
+ {
+    ralloc_steal(nir, block);
+ 
+    nir_foreach_instr(block, instr) {
+       ralloc_steal(nir, instr);
+ 
+       nir_foreach_src(instr, sweep_src_indirect, nir);
+       nir_foreach_dest(instr, sweep_dest_indirect, nir);
+    }
+ }
+ 
+ static void
+ sweep_if(nir_shader *nir, nir_if *iff)
+ {
+    ralloc_steal(nir, iff);
+ 
+    foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {
+       sweep_cf_node(nir, cf_node);
+    }
+ 
+    foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {
+       sweep_cf_node(nir, cf_node);
+    }
+ }
+ 
+ static void
+ sweep_loop(nir_shader *nir, nir_loop *loop)
+ {
+    ralloc_steal(nir, loop);
+ 
+    foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+       sweep_cf_node(nir, cf_node);
+    }
+ }
+ 
+ static void
+ sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)
+ {
+    switch (cf_node->type) {
+    case nir_cf_node_block:
+       sweep_block(nir, nir_cf_node_as_block(cf_node));
+       break;
+    case nir_cf_node_if:
+       sweep_if(nir, nir_cf_node_as_if(cf_node));
+       break;
+    case nir_cf_node_loop:
+       sweep_loop(nir, nir_cf_node_as_loop(cf_node));
+       break;
+    default:
+       unreachable("Invalid CF node type");
+    }
+ }
+ 
+ static void
+ sweep_impl(nir_shader *nir, nir_function_impl *impl)
+ {
+    ralloc_steal(nir, impl);
+ 
+    ralloc_steal(nir, impl->params);
+    ralloc_steal(nir, impl->return_var);
+    steal_list(nir, nir_variable, &impl->locals);
+    steal_list(nir, nir_register, &impl->registers);
+ 
+    foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
+       sweep_cf_node(nir, cf_node);
+    }
+ 
+    sweep_block(nir, impl->end_block);
+ 
+    /* Wipe out all the metadata, if any. */
+    nir_metadata_preserve(impl, nir_metadata_none);
+ }
+ 
+ static void
+ sweep_function(nir_shader *nir, nir_function *f)
+ {
+    ralloc_steal(nir, f);
+    ralloc_steal(nir, f->params);
+ 
+    if (f->impl)
+       sweep_impl(nir, f->impl);
+ }
+ 
+ void
+ nir_sweep(nir_shader *nir)
+ {
+    void *rubbish = ralloc_context(NULL);
+ 
+    /* First, move ownership of all the memory to a temporary context; assume dead. */
+    ralloc_adopt(rubbish, nir);
+ 
+    ralloc_steal(nir, (char *)nir->info.name);
+    if (nir->info.label)
+       ralloc_steal(nir, (char *)nir->info.label);
+ 
+    /* Variables and registers are not dead.  Steal them back. */
+    steal_list(nir, nir_variable, &nir->uniforms);
+    steal_list(nir, nir_variable, &nir->inputs);
+    steal_list(nir, nir_variable, &nir->outputs);
++   steal_list(nir, nir_variable, &nir->shared);
+    steal_list(nir, nir_variable, &nir->globals);
+    steal_list(nir, nir_variable, &nir->system_values);
+    steal_list(nir, nir_register, &nir->registers);
+ 
+    /* Recurse into functions, stealing their contents back. */
+    foreach_list_typed(nir_function, func, node, &nir->functions) {
+       sweep_function(nir, func);
+    }
+ 
+    /* Free everything we didn't steal back. */
+    ralloc_free(rubbish);
+ }
diff --cc src/compiler/nir/nir_validate.c

index 0000000000000000000000000000000000000000,e4db68db3c0102fb8bb3a6fb64b7c15ddc72bb51..1a943d76314bb973c3767544914f796398fd48d7

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@@ -1,0 -1,1071 +1,1076 @@@
- -   if (instr->return_deref == NULL)
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ #include "nir.h"
+ #include <assert.h>
+ 
+ /*
+  * This file checks for invalid IR indicating a bug somewhere in the compiler.
+  */
+ 
+ /* Since this file is just a pile of asserts, don't bother compiling it if
+  * we're not building a debug build.
+  */
+ #ifdef DEBUG
+ 
+ /*
+  * Per-register validation state.
+  */
+ 
+ typedef struct {
+    /*
+     * equivalent to the uses and defs in nir_register, but built up by the
+     * validator. At the end, we verify that the sets have the same entries.
+     */
+    struct set *uses, *if_uses, *defs;
+    nir_function_impl *where_defined; /* NULL for global registers */
+ } reg_validate_state;
+ 
+ typedef struct {
+    /*
+     * equivalent to the uses in nir_ssa_def, but built up by the validator.
+     * At the end, we verify that the sets have the same entries.
+     */
+    struct set *uses, *if_uses;
+    nir_function_impl *where_defined;
+ } ssa_def_validate_state;
+ 
+ typedef struct {
+    /* map of register -> validation state (struct above) */
+    struct hash_table *regs;
+ 
+    /* the current shader being validated */
+    nir_shader *shader;
+ 
+    /* the current instruction being validated */
+    nir_instr *instr;
+ 
+    /* the current basic block being validated */
+    nir_block *block;
+ 
+    /* the current if statement being validated */
+    nir_if *if_stmt;
+ 
+    /* the current loop being visited */
+    nir_loop *loop;
+ 
+    /* the parent of the current cf node being visited */
+    nir_cf_node *parent_node;
+ 
+    /* the current function implementation being validated */
+    nir_function_impl *impl;
+ 
+    /* map of SSA value -> function implementation where it is defined */
+    struct hash_table *ssa_defs;
+ 
+    /* bitset of ssa definitions we have found; used to check uniqueness */
+    BITSET_WORD *ssa_defs_found;
+ 
+    /* bitset of registers we have currently found; used to check uniqueness */
+    BITSET_WORD *regs_found;
+ 
+    /* map of local variable -> function implementation where it is defined */
+    struct hash_table *var_defs;
+ } validate_state;
+ 
+ static void validate_src(nir_src *src, validate_state *state);
+ 
+ static void
+ validate_reg_src(nir_src *src, validate_state *state)
+ {
+    assert(src->reg.reg != NULL);
+ 
+    struct hash_entry *entry;
+    entry = _mesa_hash_table_search(state->regs, src->reg.reg);
+    assert(entry);
+ 
+    reg_validate_state *reg_state = (reg_validate_state *) entry->data;
+ 
+    if (state->instr) {
+       _mesa_set_add(reg_state->uses, src);
+    } else {
+       assert(state->if_stmt);
+       _mesa_set_add(reg_state->if_uses, src);
+    }
+ 
+    if (!src->reg.reg->is_global) {
+       assert(reg_state->where_defined == state->impl &&
+              "using a register declared in a different function");
+    }
+ 
+    assert((src->reg.reg->num_array_elems == 0 ||
+           src->reg.base_offset < src->reg.reg->num_array_elems) &&
+           "definitely out-of-bounds array access");
+ 
+    if (src->reg.indirect) {
+       assert(src->reg.reg->num_array_elems != 0);
+       assert((src->reg.indirect->is_ssa ||
+               src->reg.indirect->reg.indirect == NULL) &&
+              "only one level of indirection allowed");
+       validate_src(src->reg.indirect, state);
+    }
+ }
+ 
+ static void
+ validate_ssa_src(nir_src *src, validate_state *state)
+ {
+    assert(src->ssa != NULL);
+ 
+    struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, src->ssa);
+ 
+    assert(entry);
+ 
+    ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data;
+ 
+    assert(def_state->where_defined == state->impl &&
+           "using an SSA value defined in a different function");
+ 
+    if (state->instr) {
+       _mesa_set_add(def_state->uses, src);
+    } else {
+       assert(state->if_stmt);
+       _mesa_set_add(def_state->if_uses, src);
+    }
+ 
+    /* TODO validate that the use is dominated by the definition */
+ }
+ 
+ static void
+ validate_src(nir_src *src, validate_state *state)
+ {
+    if (state->instr)
+       assert(src->parent_instr == state->instr);
+    else
+       assert(src->parent_if == state->if_stmt);
+ 
+    if (src->is_ssa)
+       validate_ssa_src(src, state);
+    else
+       validate_reg_src(src, state);
+ }
+ 
+ static void
+ validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
+ {
+    nir_alu_src *src = &instr->src[index];
+ 
+    unsigned num_components;
+    if (src->src.is_ssa)
+       num_components = src->src.ssa->num_components;
+    else {
+       if (src->src.reg.reg->is_packed)
+          num_components = 4; /* can't check anything */
+       else
+          num_components = src->src.reg.reg->num_components;
+    }
+    for (unsigned i = 0; i < 4; i++) {
+       assert(src->swizzle[i] < 4);
+ 
+       if (nir_alu_instr_channel_used(instr, index, i))
+          assert(src->swizzle[i] < num_components);
+    }
+ 
+    validate_src(&src->src, state);
+ }
+ 
+ static void
+ validate_reg_dest(nir_reg_dest *dest, validate_state *state)
+ {
+    assert(dest->reg != NULL);
+ 
+    assert(dest->parent_instr == state->instr);
+ 
+    struct hash_entry *entry2;
+    entry2 = _mesa_hash_table_search(state->regs, dest->reg);
+ 
+    assert(entry2);
+ 
+    reg_validate_state *reg_state = (reg_validate_state *) entry2->data;
+    _mesa_set_add(reg_state->defs, dest);
+ 
+    if (!dest->reg->is_global) {
+       assert(reg_state->where_defined == state->impl &&
+              "writing to a register declared in a different function");
+    }
+ 
+    assert((dest->reg->num_array_elems == 0 ||
+           dest->base_offset < dest->reg->num_array_elems) &&
+           "definitely out-of-bounds array access");
+ 
+    if (dest->indirect) {
+       assert(dest->reg->num_array_elems != 0);
+       assert((dest->indirect->is_ssa || dest->indirect->reg.indirect == NULL) &&
+              "only one level of indirection allowed");
+       validate_src(dest->indirect, state);
+    }
+ }
+ 
+ static void
+ validate_ssa_def(nir_ssa_def *def, validate_state *state)
+ {
+    assert(def->index < state->impl->ssa_alloc);
+    assert(!BITSET_TEST(state->ssa_defs_found, def->index));
+    BITSET_SET(state->ssa_defs_found, def->index);
+ 
+    assert(def->parent_instr == state->instr);
+ 
+    assert(def->num_components <= 4);
+ 
+    list_validate(&def->uses);
+    list_validate(&def->if_uses);
+ 
+    ssa_def_validate_state *def_state = ralloc(state->ssa_defs,
+                                               ssa_def_validate_state);
+    def_state->where_defined = state->impl;
+    def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer,
+                                       _mesa_key_pointer_equal);
+    def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
+    _mesa_hash_table_insert(state->ssa_defs, def, def_state);
+ }
+ 
+ static void
+ validate_dest(nir_dest *dest, validate_state *state)
+ {
+    if (dest->is_ssa)
+       validate_ssa_def(&dest->ssa, state);
+    else
+       validate_reg_dest(&dest->reg, state);
+ }
+ 
+ static void
+ validate_alu_dest(nir_alu_dest *dest, validate_state *state)
+ {
+    unsigned dest_size =
+       dest->dest.is_ssa ? dest->dest.ssa.num_components
+                         : dest->dest.reg.reg->num_components;
+    bool is_packed = !dest->dest.is_ssa && dest->dest.reg.reg->is_packed;
+    /*
+     * validate that the instruction doesn't write to components not in the
+     * register/SSA value
+     */
+    assert(is_packed || !(dest->write_mask & ~((1 << dest_size) - 1)));
+ 
+    /* validate that saturate is only ever used on instructions with
+     * destinations of type float
+     */
+    nir_alu_instr *alu = nir_instr_as_alu(state->instr);
+    assert(nir_op_infos[alu->op].output_type == nir_type_float ||
+           !dest->saturate);
+ 
+    validate_dest(&dest->dest, state);
+ }
+ 
+ static void
+ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
+ {
+    assert(instr->op < nir_num_opcodes);
+ 
+    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+       validate_alu_src(instr, i, state);
+    }
+ 
+    validate_alu_dest(&instr->dest, state);
+ }
+ 
+ static void
+ validate_deref_chain(nir_deref *deref, validate_state *state)
+ {
+    assert(deref->child == NULL || ralloc_parent(deref->child) == deref);
+ 
+    nir_deref *parent = NULL;
+    while (deref != NULL) {
+       switch (deref->deref_type) {
+       case nir_deref_type_array:
+          assert(deref->type == glsl_get_array_element(parent->type));
+          if (nir_deref_as_array(deref)->deref_array_type ==
+              nir_deref_array_type_indirect)
+             validate_src(&nir_deref_as_array(deref)->indirect, state);
+          break;
+ 
+       case nir_deref_type_struct:
+          assert(deref->type ==
+                 glsl_get_struct_field(parent->type,
+                                       nir_deref_as_struct(deref)->index));
+          break;
+ 
+       case nir_deref_type_var:
+          break;
+ 
+       default:
+          assert(!"Invalid deref type");
+          break;
+       }
+ 
+       parent = deref;
+       deref = deref->child;
+    }
+ }
+ 
+ static void
+ validate_var_use(nir_variable *var, validate_state *state)
+ {
+    if (var->data.mode == nir_var_local) {
+       struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var);
+ 
+       assert(entry);
+       assert((nir_function_impl *) entry->data == state->impl);
+    }
+ }
+ 
+ static void
+ validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state)
+ {
+    assert(deref != NULL);
+    assert(ralloc_parent(deref) == parent_mem_ctx);
+    assert(deref->deref.type == deref->var->type);
+ 
+    validate_var_use(deref->var, state);
+ 
+    validate_deref_chain(&deref->deref, state);
+ }
+ 
+ static void
+ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
+ {
+    unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+    for (unsigned i = 0; i < num_srcs; i++) {
+       unsigned components_read =
+          nir_intrinsic_infos[instr->intrinsic].src_components[i];
+       if (components_read == 0)
+          components_read = instr->num_components;
+ 
+       assert(components_read > 0);
+ 
+       if (instr->src[i].is_ssa) {
+          assert(components_read <= instr->src[i].ssa->num_components);
+       } else if (!instr->src[i].reg.reg->is_packed) {
+          assert(components_read <= instr->src[i].reg.reg->num_components);
+       }
+ 
+       validate_src(&instr->src[i], state);
+    }
+ 
+    unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+    for (unsigned i = 0; i < num_vars; i++) {
+       validate_deref_var(instr, instr->variables[i], state);
+    }
+ 
+    if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
+       unsigned components_written =
+          nir_intrinsic_infos[instr->intrinsic].dest_components;
+       if (components_written == 0)
+          components_written = instr->num_components;
+ 
+       assert(components_written > 0);
+ 
+       if (instr->dest.is_ssa) {
+          assert(components_written <= instr->dest.ssa.num_components);
+       } else if (!instr->dest.reg.reg->is_packed) {
+          assert(components_written <= instr->dest.reg.reg->num_components);
+       }
+ 
+       validate_dest(&instr->dest, state);
+    }
+ 
+    switch (instr->intrinsic) {
+    case nir_intrinsic_load_var: {
+       const struct glsl_type *type =
+          nir_deref_tail(&instr->variables[0]->deref)->type;
+       assert(glsl_type_is_vector_or_scalar(type) ||
+              (instr->variables[0]->var->data.mode == nir_var_uniform &&
+               glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE));
+       assert(instr->num_components == glsl_get_vector_elements(type));
+       break;
+    }
+    case nir_intrinsic_store_var: {
+       const struct glsl_type *type =
+          nir_deref_tail(&instr->variables[0]->deref)->type;
+       assert(glsl_type_is_vector_or_scalar(type) ||
+              (instr->variables[0]->var->data.mode == nir_var_uniform &&
+               glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE));
+       assert(instr->num_components == glsl_get_vector_elements(type));
+       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
+              instr->variables[0]->var->data.mode != nir_var_uniform &&
+              instr->variables[0]->var->data.mode != nir_var_shader_storage);
+       assert((instr->const_index[0] & ~((1 << instr->num_components) - 1)) == 0);
+       break;
+    }
+    case nir_intrinsic_copy_var:
+       assert(nir_deref_tail(&instr->variables[0]->deref)->type ==
+              nir_deref_tail(&instr->variables[1]->deref)->type);
+       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
+              instr->variables[0]->var->data.mode != nir_var_uniform &&
+              instr->variables[0]->var->data.mode != nir_var_shader_storage);
+       break;
+    default:
+       break;
+    }
+ }
+ 
+ static void
+ validate_tex_instr(nir_tex_instr *instr, validate_state *state)
+ {
+    bool src_type_seen[nir_num_tex_src_types];
+    for (unsigned i = 0; i < nir_num_tex_src_types; i++)
+       src_type_seen[i] = false;
+ 
+    for (unsigned i = 0; i < instr->num_srcs; i++) {
+       assert(!src_type_seen[instr->src[i].src_type]);
+       src_type_seen[instr->src[i].src_type] = true;
+       validate_src(&instr->src[i].src, state);
+    }
+ 
+    if (instr->sampler != NULL)
+       validate_deref_var(instr, instr->sampler, state);
+ 
+    validate_dest(&instr->dest, state);
+ }
+ 
+ static void
+ validate_call_instr(nir_call_instr *instr, validate_state *state)
+ {
- -   else
++   if (instr->return_deref == NULL) {
+       assert(glsl_type_is_void(instr->callee->return_type));
- -
- -   validate_deref_var(instr, instr->return_deref, state);
++   } else {
+       assert(instr->return_deref->deref.type == instr->callee->return_type);
++      validate_deref_var(instr, instr->return_deref, state);
++   }
+ 
+    assert(instr->num_params == instr->callee->num_params);
+ 
+    for (unsigned i = 0; i < instr->num_params; i++) {
+       assert(instr->callee->params[i].type == instr->params[i]->deref.type);
+       validate_deref_var(instr, instr->params[i], state);
+    }
+ }
+ 
+ static void
+ validate_load_const_instr(nir_load_const_instr *instr, validate_state *state)
+ {
+    validate_ssa_def(&instr->def, state);
+ }
+ 
+ static void
+ validate_ssa_undef_instr(nir_ssa_undef_instr *instr, validate_state *state)
+ {
+    validate_ssa_def(&instr->def, state);
+ }
+ 
+ static void
+ validate_phi_instr(nir_phi_instr *instr, validate_state *state)
+ {
+    /*
+     * don't validate the sources until we get to them from their predecessor
+     * basic blocks, to avoid validating an SSA use before its definition.
+     */
+ 
+    validate_dest(&instr->dest, state);
+ 
+    exec_list_validate(&instr->srcs);
+    assert(exec_list_length(&instr->srcs) ==
+           state->block->predecessors->entries);
+ }
+ 
+ static void
+ validate_instr(nir_instr *instr, validate_state *state)
+ {
+    assert(instr->block == state->block);
+ 
+    state->instr = instr;
+ 
+    switch (instr->type) {
+    case nir_instr_type_alu:
+       validate_alu_instr(nir_instr_as_alu(instr), state);
+       break;
+ 
+    case nir_instr_type_call:
+       validate_call_instr(nir_instr_as_call(instr), state);
+       break;
+ 
+    case nir_instr_type_intrinsic:
+       validate_intrinsic_instr(nir_instr_as_intrinsic(instr), state);
+       break;
+ 
+    case nir_instr_type_tex:
+       validate_tex_instr(nir_instr_as_tex(instr), state);
+       break;
+ 
+    case nir_instr_type_load_const:
+       validate_load_const_instr(nir_instr_as_load_const(instr), state);
+       break;
+ 
+    case nir_instr_type_phi:
+       validate_phi_instr(nir_instr_as_phi(instr), state);
+       break;
+ 
+    case nir_instr_type_ssa_undef:
+       validate_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state);
+       break;
+ 
+    case nir_instr_type_jump:
+       break;
+ 
+    default:
+       assert(!"Invalid ALU instruction type");
+       break;
+    }
+ 
+    state->instr = NULL;
+ }
+ 
+ static void
+ validate_phi_src(nir_phi_instr *instr, nir_block *pred, validate_state *state)
+ {
+    state->instr = &instr->instr;
+ 
+    assert(instr->dest.is_ssa);
+ 
+    exec_list_validate(&instr->srcs);
+    nir_foreach_phi_src(instr, src) {
+       if (src->pred == pred) {
+          assert(src->src.is_ssa);
+          assert(src->src.ssa->num_components ==
+                 instr->dest.ssa.num_components);
+ 
+          validate_src(&src->src, state);
+          state->instr = NULL;
+          return;
+       }
+    }
+ 
+    abort();
+ }
+ 
+ static void
+ validate_phi_srcs(nir_block *block, nir_block *succ, validate_state *state)
+ {
+    nir_foreach_instr(succ, instr) {
+       if (instr->type != nir_instr_type_phi)
+          break;
+ 
+       validate_phi_src(nir_instr_as_phi(instr), block, state);
+    }
+ }
+ 
+ static void validate_cf_node(nir_cf_node *node, validate_state *state);
+ 
+ static void
+ validate_block(nir_block *block, validate_state *state)
+ {
+    assert(block->cf_node.parent == state->parent_node);
+ 
+    state->block = block;
+ 
+    exec_list_validate(&block->instr_list);
+    nir_foreach_instr(block, instr) {
+       if (instr->type == nir_instr_type_phi) {
+          assert(instr == nir_block_first_instr(block) ||
+                 nir_instr_prev(instr)->type == nir_instr_type_phi);
+       }
+ 
+       if (instr->type == nir_instr_type_jump) {
+          assert(instr == nir_block_last_instr(block));
+       }
+ 
+       validate_instr(instr, state);
+    }
+ 
+    assert(block->successors[0] != NULL);
+    assert(block->successors[0] != block->successors[1]);
+ 
+    for (unsigned i = 0; i < 2; i++) {
+       if (block->successors[i] != NULL) {
+          struct set_entry *entry =
+             _mesa_set_search(block->successors[i]->predecessors, block);
+          assert(entry);
+ 
+          validate_phi_srcs(block, block->successors[i], state);
+       }
+    }
+ 
+    struct set_entry *entry;
+    set_foreach(block->predecessors, entry) {
+       const nir_block *pred = entry->key;
+       assert(pred->successors[0] == block ||
+              pred->successors[1] == block);
+    }
+ 
+    if (!exec_list_is_empty(&block->instr_list) &&
+        nir_block_last_instr(block)->type == nir_instr_type_jump) {
+       assert(block->successors[1] == NULL);
+       nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block));
+       switch (jump->type) {
+       case nir_jump_break: {
+          nir_block *after =
+             nir_cf_node_as_block(nir_cf_node_next(&state->loop->cf_node));
+          assert(block->successors[0] == after);
+          break;
+       }
+ 
+       case nir_jump_continue: {
+          nir_block *first =
+             nir_cf_node_as_block(nir_loop_first_cf_node(state->loop));
+          assert(block->successors[0] == first);
+          break;
+       }
+ 
+       case nir_jump_return:
+          assert(block->successors[0] == state->impl->end_block);
+          break;
+ 
+       default:
+          unreachable("bad jump type");
+       }
+    } else {
+       nir_cf_node *next = nir_cf_node_next(&block->cf_node);
+       if (next == NULL) {
+          switch (state->parent_node->type) {
+          case nir_cf_node_loop: {
+             nir_block *first =
+                nir_cf_node_as_block(nir_loop_first_cf_node(state->loop));
+             assert(block->successors[0] == first);
+             /* due to the hack for infinite loops, block->successors[1] may
+              * point to the block after the loop.
+              */
+             break;
+          }
+ 
+          case nir_cf_node_if: {
+             nir_block *after =
+                nir_cf_node_as_block(nir_cf_node_next(state->parent_node));
+             assert(block->successors[0] == after);
+             assert(block->successors[1] == NULL);
+             break;
+          }
+ 
+          case nir_cf_node_function:
+             assert(block->successors[0] == state->impl->end_block);
+             assert(block->successors[1] == NULL);
+             break;
+ 
+          default:
+             unreachable("unknown control flow node type");
+          }
+       } else {
+          if (next->type == nir_cf_node_if) {
+             nir_if *if_stmt = nir_cf_node_as_if(next);
+             assert(&block->successors[0]->cf_node ==
+                    nir_if_first_then_node(if_stmt));
+             assert(&block->successors[1]->cf_node ==
+                    nir_if_first_else_node(if_stmt));
+          } else {
+             assert(next->type == nir_cf_node_loop);
+             nir_loop *loop = nir_cf_node_as_loop(next);
+             assert(&block->successors[0]->cf_node ==
+                    nir_loop_first_cf_node(loop));
+             assert(block->successors[1] == NULL);
+          }
+       }
+    }
+ }
+ 
+ static void
+ validate_if(nir_if *if_stmt, validate_state *state)
+ {
+    state->if_stmt = if_stmt;
+ 
+    assert(!exec_node_is_head_sentinel(if_stmt->cf_node.node.prev));
+    nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node);
+    assert(prev_node->type == nir_cf_node_block);
+ 
+    assert(!exec_node_is_tail_sentinel(if_stmt->cf_node.node.next));
+    nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node);
+    assert(next_node->type == nir_cf_node_block);
+ 
+    validate_src(&if_stmt->condition, state);
+ 
+    assert(!exec_list_is_empty(&if_stmt->then_list));
+    assert(!exec_list_is_empty(&if_stmt->else_list));
+ 
+    nir_cf_node *old_parent = state->parent_node;
+    state->parent_node = &if_stmt->cf_node;
+ 
+    exec_list_validate(&if_stmt->then_list);
+    foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->then_list) {
+       validate_cf_node(cf_node, state);
+    }
+ 
+    exec_list_validate(&if_stmt->else_list);
+    foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->else_list) {
+       validate_cf_node(cf_node, state);
+    }
+ 
+    state->parent_node = old_parent;
+    state->if_stmt = NULL;
+ }
+ 
+ static void
+ validate_loop(nir_loop *loop, validate_state *state)
+ {
+    assert(!exec_node_is_head_sentinel(loop->cf_node.node.prev));
+    nir_cf_node *prev_node = nir_cf_node_prev(&loop->cf_node);
+    assert(prev_node->type == nir_cf_node_block);
+ 
+    assert(!exec_node_is_tail_sentinel(loop->cf_node.node.next));
+    nir_cf_node *next_node = nir_cf_node_next(&loop->cf_node);
+    assert(next_node->type == nir_cf_node_block);
+ 
+    assert(!exec_list_is_empty(&loop->body));
+ 
+    nir_cf_node *old_parent = state->parent_node;
+    state->parent_node = &loop->cf_node;
+    nir_loop *old_loop = state->loop;
+    state->loop = loop;
+ 
+    exec_list_validate(&loop->body);
+    foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+       validate_cf_node(cf_node, state);
+    }
+ 
+    state->parent_node = old_parent;
+    state->loop = old_loop;
+ }
+ 
+ static void
+ validate_cf_node(nir_cf_node *node, validate_state *state)
+ {
+    assert(node->parent == state->parent_node);
+ 
+    switch (node->type) {
+    case nir_cf_node_block:
+       validate_block(nir_cf_node_as_block(node), state);
+       break;
+ 
+    case nir_cf_node_if:
+       validate_if(nir_cf_node_as_if(node), state);
+       break;
+ 
+    case nir_cf_node_loop:
+       validate_loop(nir_cf_node_as_loop(node), state);
+       break;
+ 
+    default:
+       unreachable("Invalid CF node type");
+    }
+ }
+ 
+ static void
+ prevalidate_reg_decl(nir_register *reg, bool is_global, validate_state *state)
+ {
+    assert(reg->is_global == is_global);
+ 
+    if (is_global)
+       assert(reg->index < state->shader->reg_alloc);
+    else
+       assert(reg->index < state->impl->reg_alloc);
+    assert(!BITSET_TEST(state->regs_found, reg->index));
+    BITSET_SET(state->regs_found, reg->index);
+ 
+    list_validate(&reg->uses);
+    list_validate(&reg->defs);
+    list_validate(&reg->if_uses);
+ 
+    reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state);
+    reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
+                                       _mesa_key_pointer_equal);
+    reg_state->if_uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
+    reg_state->defs = _mesa_set_create(reg_state, _mesa_hash_pointer,
+                                       _mesa_key_pointer_equal);
+ 
+    reg_state->where_defined = is_global ? NULL : state->impl;
+ 
+    _mesa_hash_table_insert(state->regs, reg, reg_state);
+ }
+ 
+ static void
+ postvalidate_reg_decl(nir_register *reg, validate_state *state)
+ {
+    struct hash_entry *entry = _mesa_hash_table_search(state->regs, reg);
+ 
+    reg_validate_state *reg_state = (reg_validate_state *) entry->data;
+ 
+    nir_foreach_use(reg, src) {
+       struct set_entry *entry = _mesa_set_search(reg_state->uses, src);
+       assert(entry);
+       _mesa_set_remove(reg_state->uses, entry);
+    }
+ 
+    if (reg_state->uses->entries != 0) {
+       printf("extra entries in register uses:\n");
+       struct set_entry *entry;
+       set_foreach(reg_state->uses, entry)
+          printf("%p\n", entry->key);
+ 
+       abort();
+    }
+ 
+    nir_foreach_if_use(reg, src) {
+       struct set_entry *entry = _mesa_set_search(reg_state->if_uses, src);
+       assert(entry);
+       _mesa_set_remove(reg_state->if_uses, entry);
+    }
+ 
+    if (reg_state->if_uses->entries != 0) {
+       printf("extra entries in register if_uses:\n");
+       struct set_entry *entry;
+       set_foreach(reg_state->if_uses, entry)
+          printf("%p\n", entry->key);
+ 
+       abort();
+    }
+ 
+    nir_foreach_def(reg, src) {
+       struct set_entry *entry = _mesa_set_search(reg_state->defs, src);
+       assert(entry);
+       _mesa_set_remove(reg_state->defs, entry);
+    }
+ 
+    if (reg_state->defs->entries != 0) {
+       printf("extra entries in register defs:\n");
+       struct set_entry *entry;
+       set_foreach(reg_state->defs, entry)
+          printf("%p\n", entry->key);
+ 
+       abort();
+    }
+ }
+ 
+ static void
+ validate_var_decl(nir_variable *var, bool is_global, validate_state *state)
+ {
+    assert(is_global != (var->data.mode == nir_var_local));
+ 
+    /*
+     * TODO validate some things ir_validate.cpp does (requires more GLSL type
+     * support)
+     */
+ 
+    if (!is_global) {
+       _mesa_hash_table_insert(state->var_defs, var, state->impl);
+    }
+ }
+ 
+ static bool
+ postvalidate_ssa_def(nir_ssa_def *def, void *void_state)
+ {
+    validate_state *state = void_state;
+ 
+    struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def);
+    ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data;
+ 
+    nir_foreach_use(def, src) {
+       struct set_entry *entry = _mesa_set_search(def_state->uses, src);
+       assert(entry);
+       _mesa_set_remove(def_state->uses, entry);
+    }
+ 
+    if (def_state->uses->entries != 0) {
+       printf("extra entries in register uses:\n");
+       struct set_entry *entry;
+       set_foreach(def_state->uses, entry)
+          printf("%p\n", entry->key);
+ 
+       abort();
+    }
+ 
+    nir_foreach_if_use(def, src) {
+       struct set_entry *entry = _mesa_set_search(def_state->if_uses, src);
+       assert(entry);
+       _mesa_set_remove(def_state->if_uses, entry);
+    }
+ 
+    if (def_state->if_uses->entries != 0) {
+       printf("extra entries in register uses:\n");
+       struct set_entry *entry;
+       set_foreach(def_state->if_uses, entry)
+          printf("%p\n", entry->key);
+ 
+       abort();
+    }
+ 
+    return true;
+ }
+ 
+ static bool
+ postvalidate_ssa_defs_block(nir_block *block, void *state)
+ {
+    nir_foreach_instr(block, instr)
+       nir_foreach_ssa_def(instr, postvalidate_ssa_def, state);
+ 
+    return true;
+ }
+ 
+ static void
+ validate_function_impl(nir_function_impl *impl, validate_state *state)
+ {
+    assert(impl->function->impl == impl);
+    assert(impl->cf_node.parent == NULL);
+ 
+    assert(impl->num_params == impl->function->num_params);
+    for (unsigned i = 0; i < impl->num_params; i++)
+       assert(impl->params[i]->type == impl->function->params[i].type);
+ 
+    if (glsl_type_is_void(impl->function->return_type))
+       assert(impl->return_var == NULL);
+    else
+       assert(impl->return_var->type == impl->function->return_type);
+ 
+    assert(exec_list_is_empty(&impl->end_block->instr_list));
+    assert(impl->end_block->successors[0] == NULL);
+    assert(impl->end_block->successors[1] == NULL);
+ 
+    state->impl = impl;
+    state->parent_node = &impl->cf_node;
+ 
+    exec_list_validate(&impl->locals);
+    nir_foreach_variable(var, &impl->locals) {
+       validate_var_decl(var, false, state);
+    }
+ 
+    state->regs_found = realloc(state->regs_found,
+                                BITSET_WORDS(impl->reg_alloc) *
+                                sizeof(BITSET_WORD));
+    memset(state->regs_found, 0, BITSET_WORDS(impl->reg_alloc) *
+                                 sizeof(BITSET_WORD));
+    exec_list_validate(&impl->registers);
+    foreach_list_typed(nir_register, reg, node, &impl->registers) {
+       prevalidate_reg_decl(reg, false, state);
+    }
+ 
+    state->ssa_defs_found = realloc(state->ssa_defs_found,
+                                    BITSET_WORDS(impl->ssa_alloc) *
+                                    sizeof(BITSET_WORD));
+    memset(state->ssa_defs_found, 0, BITSET_WORDS(impl->ssa_alloc) *
+                                     sizeof(BITSET_WORD));
+    exec_list_validate(&impl->body);
+    foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+       validate_cf_node(node, state);
+    }
+ 
+    foreach_list_typed(nir_register, reg, node, &impl->registers) {
+       postvalidate_reg_decl(reg, state);
+    }
+ 
+    nir_foreach_block(impl, postvalidate_ssa_defs_block, state);
+ }
+ 
+ static void
+ validate_function(nir_function *func, validate_state *state)
+ {
+    if (func->impl != NULL) {
+       assert(func->impl->function == func);
+       validate_function_impl(func->impl, state);
+    }
+ }
+ 
+ static void
+ init_validate_state(validate_state *state)
+ {
+    state->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
+    state->ssa_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                              _mesa_key_pointer_equal);
+    state->ssa_defs_found = NULL;
+    state->regs_found = NULL;
+    state->var_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                              _mesa_key_pointer_equal);
+    state->loop = NULL;
+ }
+ 
+ static void
+ destroy_validate_state(validate_state *state)
+ {
+    _mesa_hash_table_destroy(state->regs, NULL);
+    _mesa_hash_table_destroy(state->ssa_defs, NULL);
+    free(state->ssa_defs_found);
+    free(state->regs_found);
+    _mesa_hash_table_destroy(state->var_defs, NULL);
+ }
+ 
+ void
+ nir_validate_shader(nir_shader *shader)
+ {
+    validate_state state;
+    init_validate_state(&state);
+ 
+    state.shader = shader;
+ 
+    exec_list_validate(&shader->uniforms);
+    nir_foreach_variable(var, &shader->uniforms) {
+       validate_var_decl(var, true, &state);
+    }
+ 
+    exec_list_validate(&shader->inputs);
+    nir_foreach_variable(var, &shader->inputs) {
+      validate_var_decl(var, true, &state);
+    }
+ 
+    exec_list_validate(&shader->outputs);
+    nir_foreach_variable(var, &shader->outputs) {
+      validate_var_decl(var, true, &state);
+    }
+ 
++   exec_list_validate(&shader->shared);
++   nir_foreach_variable(var, &shader->shared) {
++      validate_var_decl(var, true, &state);
++   }
++
+    exec_list_validate(&shader->globals);
+    nir_foreach_variable(var, &shader->globals) {
+      validate_var_decl(var, true, &state);
+    }
+ 
+    exec_list_validate(&shader->system_values);
+    nir_foreach_variable(var, &shader->system_values) {
+      validate_var_decl(var, true, &state);
+    }
+ 
+    state.regs_found = realloc(state.regs_found,
+                               BITSET_WORDS(shader->reg_alloc) *
+                               sizeof(BITSET_WORD));
+    memset(state.regs_found, 0, BITSET_WORDS(shader->reg_alloc) *
+                                sizeof(BITSET_WORD));
+    exec_list_validate(&shader->registers);
+    foreach_list_typed(nir_register, reg, node, &shader->registers) {
+       prevalidate_reg_decl(reg, true, &state);
+    }
+ 
+    exec_list_validate(&shader->functions);
+    foreach_list_typed(nir_function, func, node, &shader->functions) {
+       validate_function(func, &state);
+    }
+ 
+    foreach_list_typed(nir_register, reg, node, &shader->registers) {
+       postvalidate_reg_decl(reg, &state);
+    }
+ 
+    destroy_validate_state(&state);
+ }
+ 
+ #endif /* NDEBUG */
diff --cc src/compiler/nir/spirv/GLSL.std.450.h

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..d1c9b5c1d44f791dfa1146fd85f81893143bb43a

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/spirv/GLSL.std.450.h
@@@ -1,0 -1,0 +1,127 @@@
++/*
++** Copyright (c) 2014-2015 The Khronos Group Inc.
++**
++** Permission is hereby granted, free of charge, to any person obtaining a copy
++** of this software and/or associated documentation files (the "Materials"),
++** to deal in the Materials without restriction, including without limitation
++** the rights to use, copy, modify, merge, publish, distribute, sublicense,
++** and/or sell copies of the Materials, and to permit persons to whom the
++** Materials are furnished to do so, subject to the following conditions:
++**
++** The above copyright notice and this permission notice shall be included in
++** all copies or substantial portions of the Materials.
++**
++** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
++** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
++** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ 
++**
++** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
++** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
++** IN THE MATERIALS.
++*/
++
++#ifndef GLSLstd450_H
++#define GLSLstd450_H
++
++const int GLSLstd450Version = 99;
++const int GLSLstd450Revision = 3;
++
++enum GLSLstd450 {
++    GLSLstd450Bad = 0,              // Don't use
++
++    GLSLstd450Round = 1,
++    GLSLstd450RoundEven = 2,
++    GLSLstd450Trunc = 3,
++    GLSLstd450FAbs = 4,
++    GLSLstd450SAbs = 5,
++    GLSLstd450FSign = 6,
++    GLSLstd450SSign = 7,
++    GLSLstd450Floor = 8,
++    GLSLstd450Ceil = 9,
++    GLSLstd450Fract = 10,
++
++    GLSLstd450Radians = 11,
++    GLSLstd450Degrees = 12,
++    GLSLstd450Sin = 13,
++    GLSLstd450Cos = 14,
++    GLSLstd450Tan = 15,
++    GLSLstd450Asin = 16,
++    GLSLstd450Acos = 17,
++    GLSLstd450Atan = 18,
++    GLSLstd450Sinh = 19,
++    GLSLstd450Cosh = 20,
++    GLSLstd450Tanh = 21,
++    GLSLstd450Asinh = 22,
++    GLSLstd450Acosh = 23,
++    GLSLstd450Atanh = 24,
++    GLSLstd450Atan2 = 25,
++
++    GLSLstd450Pow = 26,
++    GLSLstd450Exp = 27,
++    GLSLstd450Log = 28,
++    GLSLstd450Exp2 = 29,
++    GLSLstd450Log2 = 30,
++    GLSLstd450Sqrt = 31,
++    GLSLstd450InverseSqrt = 32,
++
++    GLSLstd450Determinant = 33,
++    GLSLstd450MatrixInverse = 34,
++
++    GLSLstd450Modf = 35,            // second operand needs an OpVariable to write to
++    GLSLstd450ModfStruct = 36,      // no OpVariable operand
++    GLSLstd450FMin = 37,
++    GLSLstd450UMin = 38,
++    GLSLstd450SMin = 39,
++    GLSLstd450FMax = 40,
++    GLSLstd450UMax = 41,
++    GLSLstd450SMax = 42,
++    GLSLstd450FClamp = 43,
++    GLSLstd450UClamp = 44,
++    GLSLstd450SClamp = 45,
++    GLSLstd450FMix = 46,
++    GLSLstd450IMix = 47,
++    GLSLstd450Step = 48,
++    GLSLstd450SmoothStep = 49,
++
++    GLSLstd450Fma = 50,
++    GLSLstd450Frexp = 51,            // second operand needs an OpVariable to write to
++    GLSLstd450FrexpStruct = 52,      // no OpVariable operand
++    GLSLstd450Ldexp = 53,
++
++    GLSLstd450PackSnorm4x8 = 54,
++    GLSLstd450PackUnorm4x8 = 55,
++    GLSLstd450PackSnorm2x16 = 56,
++    GLSLstd450PackUnorm2x16 = 57,
++    GLSLstd450PackHalf2x16 = 58,
++    GLSLstd450PackDouble2x32 = 59,
++    GLSLstd450UnpackSnorm2x16 = 60,
++    GLSLstd450UnpackUnorm2x16 = 61,
++    GLSLstd450UnpackHalf2x16 = 62,
++    GLSLstd450UnpackSnorm4x8 = 63,
++    GLSLstd450UnpackUnorm4x8 = 64,
++    GLSLstd450UnpackDouble2x32 = 65,
++
++    GLSLstd450Length = 66,
++    GLSLstd450Distance = 67,
++    GLSLstd450Cross = 68,
++    GLSLstd450Normalize = 69,
++    GLSLstd450FaceForward = 70,
++    GLSLstd450Reflect = 71,
++    GLSLstd450Refract = 72,
++
++    GLSLstd450FindILsb = 73,
++    GLSLstd450FindSMsb = 74,
++    GLSLstd450FindUMsb = 75,
++
++    GLSLstd450InterpolateAtCentroid = 76,
++    GLSLstd450InterpolateAtSample = 77,
++    GLSLstd450InterpolateAtOffset = 78,
++
++    GLSLstd450Count
++};
++
++#endif  // #ifndef GLSLstd450_H
diff --cc src/compiler/nir/spirv/nir_spirv.h

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..500f2cb94dfb173d9095251b717ab9e1f21ba3a7

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/spirv/nir_spirv.h
@@@ -1,0 -1,0 +1,54 @@@
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ *
++ * Authors:
++ *    Jason Ekstrand (jason@jlekstrand.net)
++ *
++ */
++
++#pragma once
++
++#ifndef _NIR_SPIRV_H_
++#define _NIR_SPIRV_H_
++
++#include "nir/nir.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++struct nir_spirv_specialization {
++   uint32_t id;
++   uint32_t data;
++};
++
++nir_function *spirv_to_nir(const uint32_t *words, size_t word_count,
++                           struct nir_spirv_specialization *specializations,
++                           unsigned num_specializations,
++                           gl_shader_stage stage, const char *entry_point_name,
++                           const nir_shader_compiler_options *options);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif /* _NIR_SPIRV_H_ */
diff --cc src/compiler/nir/spirv/spirv.h

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..63bcb2f88ddc09909802690f466715457f3ad0fd

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/spirv/spirv.h
@@@ -1,0 -1,0 +1,870 @@@
++/*
++** Copyright (c) 2014-2015 The Khronos Group Inc.
++** 
++** Permission is hereby granted, free of charge, to any person obtaining a copy
++** of this software and/or associated documentation files (the "Materials"),
++** to deal in the Materials without restriction, including without limitation
++** the rights to use, copy, modify, merge, publish, distribute, sublicense,
++** and/or sell copies of the Materials, and to permit persons to whom the
++** Materials are furnished to do so, subject to the following conditions:
++** 
++** The above copyright notice and this permission notice shall be included in
++** all copies or substantial portions of the Materials.
++** 
++** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
++** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
++** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ 
++** 
++** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
++** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
++** IN THE MATERIALS.
++*/
++
++/*
++** This header is automatically generated by the same tool that creates
++** the Binary Section of the SPIR-V specification.
++*/
++
++/*
++** Enumeration tokens for SPIR-V, in various styles:
++**   C, C++, C++11, JSON, Lua, Python
++** 
++** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL
++** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL
++** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL
++** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL
++** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL']
++** 
++** Some tokens act like mask values, which can be OR'd together,
++** while others are mutually exclusive.  The mask-like ones have
++** "Mask" in their name, and a parallel enum that has the shift
++** amount (1 << x) for each corresponding enumerant.
++*/
++
++#ifndef spirv_H
++#define spirv_H
++
++typedef unsigned int SpvId;
++
++#define SPV_VERSION 0x10000
++#define SPV_REVISION 2
++
++static const unsigned int SpvMagicNumber = 0x07230203;
++static const unsigned int SpvVersion = 0x00010000;
++static const unsigned int SpvRevision = 2;
++static const unsigned int SpvOpCodeMask = 0xffff;
++static const unsigned int SpvWordCountShift = 16;
++
++typedef enum SpvSourceLanguage_ {
++    SpvSourceLanguageUnknown = 0,
++    SpvSourceLanguageESSL = 1,
++    SpvSourceLanguageGLSL = 2,
++    SpvSourceLanguageOpenCL_C = 3,
++    SpvSourceLanguageOpenCL_CPP = 4,
++} SpvSourceLanguage;
++
++typedef enum SpvExecutionModel_ {
++    SpvExecutionModelVertex = 0,
++    SpvExecutionModelTessellationControl = 1,
++    SpvExecutionModelTessellationEvaluation = 2,
++    SpvExecutionModelGeometry = 3,
++    SpvExecutionModelFragment = 4,
++    SpvExecutionModelGLCompute = 5,
++    SpvExecutionModelKernel = 6,
++} SpvExecutionModel;
++
++typedef enum SpvAddressingModel_ {
++    SpvAddressingModelLogical = 0,
++    SpvAddressingModelPhysical32 = 1,
++    SpvAddressingModelPhysical64 = 2,
++} SpvAddressingModel;
++
++typedef enum SpvMemoryModel_ {
++    SpvMemoryModelSimple = 0,
++    SpvMemoryModelGLSL450 = 1,
++    SpvMemoryModelOpenCL = 2,
++} SpvMemoryModel;
++
++typedef enum SpvExecutionMode_ {
++    SpvExecutionModeInvocations = 0,
++    SpvExecutionModeSpacingEqual = 1,
++    SpvExecutionModeSpacingFractionalEven = 2,
++    SpvExecutionModeSpacingFractionalOdd = 3,
++    SpvExecutionModeVertexOrderCw = 4,
++    SpvExecutionModeVertexOrderCcw = 5,
++    SpvExecutionModePixelCenterInteger = 6,
++    SpvExecutionModeOriginUpperLeft = 7,
++    SpvExecutionModeOriginLowerLeft = 8,
++    SpvExecutionModeEarlyFragmentTests = 9,
++    SpvExecutionModePointMode = 10,
++    SpvExecutionModeXfb = 11,
++    SpvExecutionModeDepthReplacing = 12,
++    SpvExecutionModeDepthGreater = 14,
++    SpvExecutionModeDepthLess = 15,
++    SpvExecutionModeDepthUnchanged = 16,
++    SpvExecutionModeLocalSize = 17,
++    SpvExecutionModeLocalSizeHint = 18,
++    SpvExecutionModeInputPoints = 19,
++    SpvExecutionModeInputLines = 20,
++    SpvExecutionModeInputLinesAdjacency = 21,
++    SpvExecutionModeTriangles = 22,
++    SpvExecutionModeInputTrianglesAdjacency = 23,
++    SpvExecutionModeQuads = 24,
++    SpvExecutionModeIsolines = 25,
++    SpvExecutionModeOutputVertices = 26,
++    SpvExecutionModeOutputPoints = 27,
++    SpvExecutionModeOutputLineStrip = 28,
++    SpvExecutionModeOutputTriangleStrip = 29,
++    SpvExecutionModeVecTypeHint = 30,
++    SpvExecutionModeContractionOff = 31,
++} SpvExecutionMode;
++
++typedef enum SpvStorageClass_ {
++    SpvStorageClassUniformConstant = 0,
++    SpvStorageClassInput = 1,
++    SpvStorageClassUniform = 2,
++    SpvStorageClassOutput = 3,
++    SpvStorageClassWorkgroup = 4,
++    SpvStorageClassCrossWorkgroup = 5,
++    SpvStorageClassPrivate = 6,
++    SpvStorageClassFunction = 7,
++    SpvStorageClassGeneric = 8,
++    SpvStorageClassPushConstant = 9,
++    SpvStorageClassAtomicCounter = 10,
++    SpvStorageClassImage = 11,
++} SpvStorageClass;
++
++typedef enum SpvDim_ {
++    SpvDim1D = 0,
++    SpvDim2D = 1,
++    SpvDim3D = 2,
++    SpvDimCube = 3,
++    SpvDimRect = 4,
++    SpvDimBuffer = 5,
++    SpvDimSubpassData = 6,
++} SpvDim;
++
++typedef enum SpvSamplerAddressingMode_ {
++    SpvSamplerAddressingModeNone = 0,
++    SpvSamplerAddressingModeClampToEdge = 1,
++    SpvSamplerAddressingModeClamp = 2,
++    SpvSamplerAddressingModeRepeat = 3,
++    SpvSamplerAddressingModeRepeatMirrored = 4,
++} SpvSamplerAddressingMode;
++
++typedef enum SpvSamplerFilterMode_ {
++    SpvSamplerFilterModeNearest = 0,
++    SpvSamplerFilterModeLinear = 1,
++} SpvSamplerFilterMode;
++
++typedef enum SpvImageFormat_ {
++    SpvImageFormatUnknown = 0,
++    SpvImageFormatRgba32f = 1,
++    SpvImageFormatRgba16f = 2,
++    SpvImageFormatR32f = 3,
++    SpvImageFormatRgba8 = 4,
++    SpvImageFormatRgba8Snorm = 5,
++    SpvImageFormatRg32f = 6,
++    SpvImageFormatRg16f = 7,
++    SpvImageFormatR11fG11fB10f = 8,
++    SpvImageFormatR16f = 9,
++    SpvImageFormatRgba16 = 10,
++    SpvImageFormatRgb10A2 = 11,
++    SpvImageFormatRg16 = 12,
++    SpvImageFormatRg8 = 13,
++    SpvImageFormatR16 = 14,
++    SpvImageFormatR8 = 15,
++    SpvImageFormatRgba16Snorm = 16,
++    SpvImageFormatRg16Snorm = 17,
++    SpvImageFormatRg8Snorm = 18,
++    SpvImageFormatR16Snorm = 19,
++    SpvImageFormatR8Snorm = 20,
++    SpvImageFormatRgba32i = 21,
++    SpvImageFormatRgba16i = 22,
++    SpvImageFormatRgba8i = 23,
++    SpvImageFormatR32i = 24,
++    SpvImageFormatRg32i = 25,
++    SpvImageFormatRg16i = 26,
++    SpvImageFormatRg8i = 27,
++    SpvImageFormatR16i = 28,
++    SpvImageFormatR8i = 29,
++    SpvImageFormatRgba32ui = 30,
++    SpvImageFormatRgba16ui = 31,
++    SpvImageFormatRgba8ui = 32,
++    SpvImageFormatR32ui = 33,
++    SpvImageFormatRgb10a2ui = 34,
++    SpvImageFormatRg32ui = 35,
++    SpvImageFormatRg16ui = 36,
++    SpvImageFormatRg8ui = 37,
++    SpvImageFormatR16ui = 38,
++    SpvImageFormatR8ui = 39,
++} SpvImageFormat;
++
++typedef enum SpvImageChannelOrder_ {
++    SpvImageChannelOrderR = 0,
++    SpvImageChannelOrderA = 1,
++    SpvImageChannelOrderRG = 2,
++    SpvImageChannelOrderRA = 3,
++    SpvImageChannelOrderRGB = 4,
++    SpvImageChannelOrderRGBA = 5,
++    SpvImageChannelOrderBGRA = 6,
++    SpvImageChannelOrderARGB = 7,
++    SpvImageChannelOrderIntensity = 8,
++    SpvImageChannelOrderLuminance = 9,
++    SpvImageChannelOrderRx = 10,
++    SpvImageChannelOrderRGx = 11,
++    SpvImageChannelOrderRGBx = 12,
++    SpvImageChannelOrderDepth = 13,
++    SpvImageChannelOrderDepthStencil = 14,
++    SpvImageChannelOrdersRGB = 15,
++    SpvImageChannelOrdersRGBx = 16,
++    SpvImageChannelOrdersRGBA = 17,
++    SpvImageChannelOrdersBGRA = 18,
++} SpvImageChannelOrder;
++
++typedef enum SpvImageChannelDataType_ {
++    SpvImageChannelDataTypeSnormInt8 = 0,
++    SpvImageChannelDataTypeSnormInt16 = 1,
++    SpvImageChannelDataTypeUnormInt8 = 2,
++    SpvImageChannelDataTypeUnormInt16 = 3,
++    SpvImageChannelDataTypeUnormShort565 = 4,
++    SpvImageChannelDataTypeUnormShort555 = 5,
++    SpvImageChannelDataTypeUnormInt101010 = 6,
++    SpvImageChannelDataTypeSignedInt8 = 7,
++    SpvImageChannelDataTypeSignedInt16 = 8,
++    SpvImageChannelDataTypeSignedInt32 = 9,
++    SpvImageChannelDataTypeUnsignedInt8 = 10,
++    SpvImageChannelDataTypeUnsignedInt16 = 11,
++    SpvImageChannelDataTypeUnsignedInt32 = 12,
++    SpvImageChannelDataTypeHalfFloat = 13,
++    SpvImageChannelDataTypeFloat = 14,
++    SpvImageChannelDataTypeUnormInt24 = 15,
++    SpvImageChannelDataTypeUnormInt101010_2 = 16,
++} SpvImageChannelDataType;
++
++typedef enum SpvImageOperandsShift_ {
++    SpvImageOperandsBiasShift = 0,
++    SpvImageOperandsLodShift = 1,
++    SpvImageOperandsGradShift = 2,
++    SpvImageOperandsConstOffsetShift = 3,
++    SpvImageOperandsOffsetShift = 4,
++    SpvImageOperandsConstOffsetsShift = 5,
++    SpvImageOperandsSampleShift = 6,
++    SpvImageOperandsMinLodShift = 7,
++} SpvImageOperandsShift;
++
++typedef enum SpvImageOperandsMask_ {
++    SpvImageOperandsMaskNone = 0,
++    SpvImageOperandsBiasMask = 0x00000001,
++    SpvImageOperandsLodMask = 0x00000002,
++    SpvImageOperandsGradMask = 0x00000004,
++    SpvImageOperandsConstOffsetMask = 0x00000008,
++    SpvImageOperandsOffsetMask = 0x00000010,
++    SpvImageOperandsConstOffsetsMask = 0x00000020,
++    SpvImageOperandsSampleMask = 0x00000040,
++    SpvImageOperandsMinLodMask = 0x00000080,
++} SpvImageOperandsMask;
++
++typedef enum SpvFPFastMathModeShift_ {
++    SpvFPFastMathModeNotNaNShift = 0,
++    SpvFPFastMathModeNotInfShift = 1,
++    SpvFPFastMathModeNSZShift = 2,
++    SpvFPFastMathModeAllowRecipShift = 3,
++    SpvFPFastMathModeFastShift = 4,
++} SpvFPFastMathModeShift;
++
++typedef enum SpvFPFastMathModeMask_ {
++    SpvFPFastMathModeMaskNone = 0,
++    SpvFPFastMathModeNotNaNMask = 0x00000001,
++    SpvFPFastMathModeNotInfMask = 0x00000002,
++    SpvFPFastMathModeNSZMask = 0x00000004,
++    SpvFPFastMathModeAllowRecipMask = 0x00000008,
++    SpvFPFastMathModeFastMask = 0x00000010,
++} SpvFPFastMathModeMask;
++
++typedef enum SpvFPRoundingMode_ {
++    SpvFPRoundingModeRTE = 0,
++    SpvFPRoundingModeRTZ = 1,
++    SpvFPRoundingModeRTP = 2,
++    SpvFPRoundingModeRTN = 3,
++} SpvFPRoundingMode;
++
++typedef enum SpvLinkageType_ {
++    SpvLinkageTypeExport = 0,
++    SpvLinkageTypeImport = 1,
++} SpvLinkageType;
++
++typedef enum SpvAccessQualifier_ {
++    SpvAccessQualifierReadOnly = 0,
++    SpvAccessQualifierWriteOnly = 1,
++    SpvAccessQualifierReadWrite = 2,
++} SpvAccessQualifier;
++
++typedef enum SpvFunctionParameterAttribute_ {
++    SpvFunctionParameterAttributeZext = 0,
++    SpvFunctionParameterAttributeSext = 1,
++    SpvFunctionParameterAttributeByVal = 2,
++    SpvFunctionParameterAttributeSret = 3,
++    SpvFunctionParameterAttributeNoAlias = 4,
++    SpvFunctionParameterAttributeNoCapture = 5,
++    SpvFunctionParameterAttributeNoWrite = 6,
++    SpvFunctionParameterAttributeNoReadWrite = 7,
++} SpvFunctionParameterAttribute;
++
++typedef enum SpvDecoration_ {
++    SpvDecorationRelaxedPrecision = 0,
++    SpvDecorationSpecId = 1,
++    SpvDecorationBlock = 2,
++    SpvDecorationBufferBlock = 3,
++    SpvDecorationRowMajor = 4,
++    SpvDecorationColMajor = 5,
++    SpvDecorationArrayStride = 6,
++    SpvDecorationMatrixStride = 7,
++    SpvDecorationGLSLShared = 8,
++    SpvDecorationGLSLPacked = 9,
++    SpvDecorationCPacked = 10,
++    SpvDecorationBuiltIn = 11,
++    SpvDecorationNoPerspective = 13,
++    SpvDecorationFlat = 14,
++    SpvDecorationPatch = 15,
++    SpvDecorationCentroid = 16,
++    SpvDecorationSample = 17,
++    SpvDecorationInvariant = 18,
++    SpvDecorationRestrict = 19,
++    SpvDecorationAliased = 20,
++    SpvDecorationVolatile = 21,
++    SpvDecorationConstant = 22,
++    SpvDecorationCoherent = 23,
++    SpvDecorationNonWritable = 24,
++    SpvDecorationNonReadable = 25,
++    SpvDecorationUniform = 26,
++    SpvDecorationSaturatedConversion = 28,
++    SpvDecorationStream = 29,
++    SpvDecorationLocation = 30,
++    SpvDecorationComponent = 31,
++    SpvDecorationIndex = 32,
++    SpvDecorationBinding = 33,
++    SpvDecorationDescriptorSet = 34,
++    SpvDecorationOffset = 35,
++    SpvDecorationXfbBuffer = 36,
++    SpvDecorationXfbStride = 37,
++    SpvDecorationFuncParamAttr = 38,
++    SpvDecorationFPRoundingMode = 39,
++    SpvDecorationFPFastMathMode = 40,
++    SpvDecorationLinkageAttributes = 41,
++    SpvDecorationNoContraction = 42,
++    SpvDecorationInputAttachmentIndex = 43,
++    SpvDecorationAlignment = 44,
++} SpvDecoration;
++
++typedef enum SpvBuiltIn_ {
++    SpvBuiltInPosition = 0,
++    SpvBuiltInPointSize = 1,
++    SpvBuiltInClipDistance = 3,
++    SpvBuiltInCullDistance = 4,
++    SpvBuiltInVertexId = 5,
++    SpvBuiltInInstanceId = 6,
++    SpvBuiltInPrimitiveId = 7,
++    SpvBuiltInInvocationId = 8,
++    SpvBuiltInLayer = 9,
++    SpvBuiltInViewportIndex = 10,
++    SpvBuiltInTessLevelOuter = 11,
++    SpvBuiltInTessLevelInner = 12,
++    SpvBuiltInTessCoord = 13,
++    SpvBuiltInPatchVertices = 14,
++    SpvBuiltInFragCoord = 15,
++    SpvBuiltInPointCoord = 16,
++    SpvBuiltInFrontFacing = 17,
++    SpvBuiltInSampleId = 18,
++    SpvBuiltInSamplePosition = 19,
++    SpvBuiltInSampleMask = 20,
++    SpvBuiltInFragDepth = 22,
++    SpvBuiltInHelperInvocation = 23,
++    SpvBuiltInNumWorkgroups = 24,
++    SpvBuiltInWorkgroupSize = 25,
++    SpvBuiltInWorkgroupId = 26,
++    SpvBuiltInLocalInvocationId = 27,
++    SpvBuiltInGlobalInvocationId = 28,
++    SpvBuiltInLocalInvocationIndex = 29,
++    SpvBuiltInWorkDim = 30,
++    SpvBuiltInGlobalSize = 31,
++    SpvBuiltInEnqueuedWorkgroupSize = 32,
++    SpvBuiltInGlobalOffset = 33,
++    SpvBuiltInGlobalLinearId = 34,
++    SpvBuiltInSubgroupSize = 36,
++    SpvBuiltInSubgroupMaxSize = 37,
++    SpvBuiltInNumSubgroups = 38,
++    SpvBuiltInNumEnqueuedSubgroups = 39,
++    SpvBuiltInSubgroupId = 40,
++    SpvBuiltInSubgroupLocalInvocationId = 41,
++    SpvBuiltInVertexIndex = 42,
++    SpvBuiltInInstanceIndex = 43,
++} SpvBuiltIn;
++
++typedef enum SpvSelectionControlShift_ {
++    SpvSelectionControlFlattenShift = 0,
++    SpvSelectionControlDontFlattenShift = 1,
++} SpvSelectionControlShift;
++
++typedef enum SpvSelectionControlMask_ {
++    SpvSelectionControlMaskNone = 0,
++    SpvSelectionControlFlattenMask = 0x00000001,
++    SpvSelectionControlDontFlattenMask = 0x00000002,
++} SpvSelectionControlMask;
++
++typedef enum SpvLoopControlShift_ {
++    SpvLoopControlUnrollShift = 0,
++    SpvLoopControlDontUnrollShift = 1,
++} SpvLoopControlShift;
++
++typedef enum SpvLoopControlMask_ {
++    SpvLoopControlMaskNone = 0,
++    SpvLoopControlUnrollMask = 0x00000001,
++    SpvLoopControlDontUnrollMask = 0x00000002,
++} SpvLoopControlMask;
++
++typedef enum SpvFunctionControlShift_ {
++    SpvFunctionControlInlineShift = 0,
++    SpvFunctionControlDontInlineShift = 1,
++    SpvFunctionControlPureShift = 2,
++    SpvFunctionControlConstShift = 3,
++} SpvFunctionControlShift;
++
++typedef enum SpvFunctionControlMask_ {
++    SpvFunctionControlMaskNone = 0,
++    SpvFunctionControlInlineMask = 0x00000001,
++    SpvFunctionControlDontInlineMask = 0x00000002,
++    SpvFunctionControlPureMask = 0x00000004,
++    SpvFunctionControlConstMask = 0x00000008,
++} SpvFunctionControlMask;
++
++typedef enum SpvMemorySemanticsShift_ {
++    SpvMemorySemanticsAcquireShift = 1,
++    SpvMemorySemanticsReleaseShift = 2,
++    SpvMemorySemanticsAcquireReleaseShift = 3,
++    SpvMemorySemanticsSequentiallyConsistentShift = 4,
++    SpvMemorySemanticsUniformMemoryShift = 6,
++    SpvMemorySemanticsSubgroupMemoryShift = 7,
++    SpvMemorySemanticsWorkgroupMemoryShift = 8,
++    SpvMemorySemanticsCrossWorkgroupMemoryShift = 9,
++    SpvMemorySemanticsAtomicCounterMemoryShift = 10,
++    SpvMemorySemanticsImageMemoryShift = 11,
++} SpvMemorySemanticsShift;
++
++typedef enum SpvMemorySemanticsMask_ {
++    SpvMemorySemanticsMaskNone = 0,
++    SpvMemorySemanticsAcquireMask = 0x00000002,
++    SpvMemorySemanticsReleaseMask = 0x00000004,
++    SpvMemorySemanticsAcquireReleaseMask = 0x00000008,
++    SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010,
++    SpvMemorySemanticsUniformMemoryMask = 0x00000040,
++    SpvMemorySemanticsSubgroupMemoryMask = 0x00000080,
++    SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100,
++    SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200,
++    SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400,
++    SpvMemorySemanticsImageMemoryMask = 0x00000800,
++} SpvMemorySemanticsMask;
++
++typedef enum SpvMemoryAccessShift_ {
++    SpvMemoryAccessVolatileShift = 0,
++    SpvMemoryAccessAlignedShift = 1,
++    SpvMemoryAccessNontemporalShift = 2,
++} SpvMemoryAccessShift;
++
++typedef enum SpvMemoryAccessMask_ {
++    SpvMemoryAccessMaskNone = 0,
++    SpvMemoryAccessVolatileMask = 0x00000001,
++    SpvMemoryAccessAlignedMask = 0x00000002,
++    SpvMemoryAccessNontemporalMask = 0x00000004,
++} SpvMemoryAccessMask;
++
++typedef enum SpvScope_ {
++    SpvScopeCrossDevice = 0,
++    SpvScopeDevice = 1,
++    SpvScopeWorkgroup = 2,
++    SpvScopeSubgroup = 3,
++    SpvScopeInvocation = 4,
++} SpvScope;
++
++typedef enum SpvGroupOperation_ {
++    SpvGroupOperationReduce = 0,
++    SpvGroupOperationInclusiveScan = 1,
++    SpvGroupOperationExclusiveScan = 2,
++} SpvGroupOperation;
++
++typedef enum SpvKernelEnqueueFlags_ {
++    SpvKernelEnqueueFlagsNoWait = 0,
++    SpvKernelEnqueueFlagsWaitKernel = 1,
++    SpvKernelEnqueueFlagsWaitWorkGroup = 2,
++} SpvKernelEnqueueFlags;
++
++typedef enum SpvKernelProfilingInfoShift_ {
++    SpvKernelProfilingInfoCmdExecTimeShift = 0,
++} SpvKernelProfilingInfoShift;
++
++typedef enum SpvKernelProfilingInfoMask_ {
++    SpvKernelProfilingInfoMaskNone = 0,
++    SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001,
++} SpvKernelProfilingInfoMask;
++
++typedef enum SpvCapability_ {
++    SpvCapabilityMatrix = 0,
++    SpvCapabilityShader = 1,
++    SpvCapabilityGeometry = 2,
++    SpvCapabilityTessellation = 3,
++    SpvCapabilityAddresses = 4,
++    SpvCapabilityLinkage = 5,
++    SpvCapabilityKernel = 6,
++    SpvCapabilityVector16 = 7,
++    SpvCapabilityFloat16Buffer = 8,
++    SpvCapabilityFloat16 = 9,
++    SpvCapabilityFloat64 = 10,
++    SpvCapabilityInt64 = 11,
++    SpvCapabilityInt64Atomics = 12,
++    SpvCapabilityImageBasic = 13,
++    SpvCapabilityImageReadWrite = 14,
++    SpvCapabilityImageMipmap = 15,
++    SpvCapabilityPipes = 17,
++    SpvCapabilityGroups = 18,
++    SpvCapabilityDeviceEnqueue = 19,
++    SpvCapabilityLiteralSampler = 20,
++    SpvCapabilityAtomicStorage = 21,
++    SpvCapabilityInt16 = 22,
++    SpvCapabilityTessellationPointSize = 23,
++    SpvCapabilityGeometryPointSize = 24,
++    SpvCapabilityImageGatherExtended = 25,
++    SpvCapabilityStorageImageMultisample = 27,
++    SpvCapabilityUniformBufferArrayDynamicIndexing = 28,
++    SpvCapabilitySampledImageArrayDynamicIndexing = 29,
++    SpvCapabilityStorageBufferArrayDynamicIndexing = 30,
++    SpvCapabilityStorageImageArrayDynamicIndexing = 31,
++    SpvCapabilityClipDistance = 32,
++    SpvCapabilityCullDistance = 33,
++    SpvCapabilityImageCubeArray = 34,
++    SpvCapabilitySampleRateShading = 35,
++    SpvCapabilityImageRect = 36,
++    SpvCapabilitySampledRect = 37,
++    SpvCapabilityGenericPointer = 38,
++    SpvCapabilityInt8 = 39,
++    SpvCapabilityInputAttachment = 40,
++    SpvCapabilitySparseResidency = 41,
++    SpvCapabilityMinLod = 42,
++    SpvCapabilitySampled1D = 43,
++    SpvCapabilityImage1D = 44,
++    SpvCapabilitySampledCubeArray = 45,
++    SpvCapabilitySampledBuffer = 46,
++    SpvCapabilityImageBuffer = 47,
++    SpvCapabilityImageMSArray = 48,
++    SpvCapabilityStorageImageExtendedFormats = 49,
++    SpvCapabilityImageQuery = 50,
++    SpvCapabilityDerivativeControl = 51,
++    SpvCapabilityInterpolationFunction = 52,
++    SpvCapabilityTransformFeedback = 53,
++    SpvCapabilityGeometryStreams = 54,
++    SpvCapabilityStorageImageReadWithoutFormat = 55,
++    SpvCapabilityStorageImageWriteWithoutFormat = 56,
++    SpvCapabilityMultiViewport = 57,
++} SpvCapability;
++
++typedef enum SpvOp_ {
++    SpvOpNop = 0,
++    SpvOpUndef = 1,
++    SpvOpSourceContinued = 2,
++    SpvOpSource = 3,
++    SpvOpSourceExtension = 4,
++    SpvOpName = 5,
++    SpvOpMemberName = 6,
++    SpvOpString = 7,
++    SpvOpLine = 8,
++    SpvOpExtension = 10,
++    SpvOpExtInstImport = 11,
++    SpvOpExtInst = 12,
++    SpvOpMemoryModel = 14,
++    SpvOpEntryPoint = 15,
++    SpvOpExecutionMode = 16,
++    SpvOpCapability = 17,
++    SpvOpTypeVoid = 19,
++    SpvOpTypeBool = 20,
++    SpvOpTypeInt = 21,
++    SpvOpTypeFloat = 22,
++    SpvOpTypeVector = 23,
++    SpvOpTypeMatrix = 24,
++    SpvOpTypeImage = 25,
++    SpvOpTypeSampler = 26,
++    SpvOpTypeSampledImage = 27,
++    SpvOpTypeArray = 28,
++    SpvOpTypeRuntimeArray = 29,
++    SpvOpTypeStruct = 30,
++    SpvOpTypeOpaque = 31,
++    SpvOpTypePointer = 32,
++    SpvOpTypeFunction = 33,
++    SpvOpTypeEvent = 34,
++    SpvOpTypeDeviceEvent = 35,
++    SpvOpTypeReserveId = 36,
++    SpvOpTypeQueue = 37,
++    SpvOpTypePipe = 38,
++    SpvOpTypeForwardPointer = 39,
++    SpvOpConstantTrue = 41,
++    SpvOpConstantFalse = 42,
++    SpvOpConstant = 43,
++    SpvOpConstantComposite = 44,
++    SpvOpConstantSampler = 45,
++    SpvOpConstantNull = 46,
++    SpvOpSpecConstantTrue = 48,
++    SpvOpSpecConstantFalse = 49,
++    SpvOpSpecConstant = 50,
++    SpvOpSpecConstantComposite = 51,
++    SpvOpSpecConstantOp = 52,
++    SpvOpFunction = 54,
++    SpvOpFunctionParameter = 55,
++    SpvOpFunctionEnd = 56,
++    SpvOpFunctionCall = 57,
++    SpvOpVariable = 59,
++    SpvOpImageTexelPointer = 60,
++    SpvOpLoad = 61,
++    SpvOpStore = 62,
++    SpvOpCopyMemory = 63,
++    SpvOpCopyMemorySized = 64,
++    SpvOpAccessChain = 65,
++    SpvOpInBoundsAccessChain = 66,
++    SpvOpPtrAccessChain = 67,
++    SpvOpArrayLength = 68,
++    SpvOpGenericPtrMemSemantics = 69,
++    SpvOpInBoundsPtrAccessChain = 70,
++    SpvOpDecorate = 71,
++    SpvOpMemberDecorate = 72,
++    SpvOpDecorationGroup = 73,
++    SpvOpGroupDecorate = 74,
++    SpvOpGroupMemberDecorate = 75,
++    SpvOpVectorExtractDynamic = 77,
++    SpvOpVectorInsertDynamic = 78,
++    SpvOpVectorShuffle = 79,
++    SpvOpCompositeConstruct = 80,
++    SpvOpCompositeExtract = 81,
++    SpvOpCompositeInsert = 82,
++    SpvOpCopyObject = 83,
++    SpvOpTranspose = 84,
++    SpvOpSampledImage = 86,
++    SpvOpImageSampleImplicitLod = 87,
++    SpvOpImageSampleExplicitLod = 88,
++    SpvOpImageSampleDrefImplicitLod = 89,
++    SpvOpImageSampleDrefExplicitLod = 90,
++    SpvOpImageSampleProjImplicitLod = 91,
++    SpvOpImageSampleProjExplicitLod = 92,
++    SpvOpImageSampleProjDrefImplicitLod = 93,
++    SpvOpImageSampleProjDrefExplicitLod = 94,
++    SpvOpImageFetch = 95,
++    SpvOpImageGather = 96,
++    SpvOpImageDrefGather = 97,
++    SpvOpImageRead = 98,
++    SpvOpImageWrite = 99,
++    SpvOpImage = 100,
++    SpvOpImageQueryFormat = 101,
++    SpvOpImageQueryOrder = 102,
++    SpvOpImageQuerySizeLod = 103,
++    SpvOpImageQuerySize = 104,
++    SpvOpImageQueryLod = 105,
++    SpvOpImageQueryLevels = 106,
++    SpvOpImageQuerySamples = 107,
++    SpvOpConvertFToU = 109,
++    SpvOpConvertFToS = 110,
++    SpvOpConvertSToF = 111,
++    SpvOpConvertUToF = 112,
++    SpvOpUConvert = 113,
++    SpvOpSConvert = 114,
++    SpvOpFConvert = 115,
++    SpvOpQuantizeToF16 = 116,
++    SpvOpConvertPtrToU = 117,
++    SpvOpSatConvertSToU = 118,
++    SpvOpSatConvertUToS = 119,
++    SpvOpConvertUToPtr = 120,
++    SpvOpPtrCastToGeneric = 121,
++    SpvOpGenericCastToPtr = 122,
++    SpvOpGenericCastToPtrExplicit = 123,
++    SpvOpBitcast = 124,
++    SpvOpSNegate = 126,
++    SpvOpFNegate = 127,
++    SpvOpIAdd = 128,
++    SpvOpFAdd = 129,
++    SpvOpISub = 130,
++    SpvOpFSub = 131,
++    SpvOpIMul = 132,
++    SpvOpFMul = 133,
++    SpvOpUDiv = 134,
++    SpvOpSDiv = 135,
++    SpvOpFDiv = 136,
++    SpvOpUMod = 137,
++    SpvOpSRem = 138,
++    SpvOpSMod = 139,
++    SpvOpFRem = 140,
++    SpvOpFMod = 141,
++    SpvOpVectorTimesScalar = 142,
++    SpvOpMatrixTimesScalar = 143,
++    SpvOpVectorTimesMatrix = 144,
++    SpvOpMatrixTimesVector = 145,
++    SpvOpMatrixTimesMatrix = 146,
++    SpvOpOuterProduct = 147,
++    SpvOpDot = 148,
++    SpvOpIAddCarry = 149,
++    SpvOpISubBorrow = 150,
++    SpvOpUMulExtended = 151,
++    SpvOpSMulExtended = 152,
++    SpvOpAny = 154,
++    SpvOpAll = 155,
++    SpvOpIsNan = 156,
++    SpvOpIsInf = 157,
++    SpvOpIsFinite = 158,
++    SpvOpIsNormal = 159,
++    SpvOpSignBitSet = 160,
++    SpvOpLessOrGreater = 161,
++    SpvOpOrdered = 162,
++    SpvOpUnordered = 163,
++    SpvOpLogicalEqual = 164,
++    SpvOpLogicalNotEqual = 165,
++    SpvOpLogicalOr = 166,
++    SpvOpLogicalAnd = 167,
++    SpvOpLogicalNot = 168,
++    SpvOpSelect = 169,
++    SpvOpIEqual = 170,
++    SpvOpINotEqual = 171,
++    SpvOpUGreaterThan = 172,
++    SpvOpSGreaterThan = 173,
++    SpvOpUGreaterThanEqual = 174,
++    SpvOpSGreaterThanEqual = 175,
++    SpvOpULessThan = 176,
++    SpvOpSLessThan = 177,
++    SpvOpULessThanEqual = 178,
++    SpvOpSLessThanEqual = 179,
++    SpvOpFOrdEqual = 180,
++    SpvOpFUnordEqual = 181,
++    SpvOpFOrdNotEqual = 182,
++    SpvOpFUnordNotEqual = 183,
++    SpvOpFOrdLessThan = 184,
++    SpvOpFUnordLessThan = 185,
++    SpvOpFOrdGreaterThan = 186,
++    SpvOpFUnordGreaterThan = 187,
++    SpvOpFOrdLessThanEqual = 188,
++    SpvOpFUnordLessThanEqual = 189,
++    SpvOpFOrdGreaterThanEqual = 190,
++    SpvOpFUnordGreaterThanEqual = 191,
++    SpvOpShiftRightLogical = 194,
++    SpvOpShiftRightArithmetic = 195,
++    SpvOpShiftLeftLogical = 196,
++    SpvOpBitwiseOr = 197,
++    SpvOpBitwiseXor = 198,
++    SpvOpBitwiseAnd = 199,
++    SpvOpNot = 200,
++    SpvOpBitFieldInsert = 201,
++    SpvOpBitFieldSExtract = 202,
++    SpvOpBitFieldUExtract = 203,
++    SpvOpBitReverse = 204,
++    SpvOpBitCount = 205,
++    SpvOpDPdx = 207,
++    SpvOpDPdy = 208,
++    SpvOpFwidth = 209,
++    SpvOpDPdxFine = 210,
++    SpvOpDPdyFine = 211,
++    SpvOpFwidthFine = 212,
++    SpvOpDPdxCoarse = 213,
++    SpvOpDPdyCoarse = 214,
++    SpvOpFwidthCoarse = 215,
++    SpvOpEmitVertex = 218,
++    SpvOpEndPrimitive = 219,
++    SpvOpEmitStreamVertex = 220,
++    SpvOpEndStreamPrimitive = 221,
++    SpvOpControlBarrier = 224,
++    SpvOpMemoryBarrier = 225,
++    SpvOpAtomicLoad = 227,
++    SpvOpAtomicStore = 228,
++    SpvOpAtomicExchange = 229,
++    SpvOpAtomicCompareExchange = 230,
++    SpvOpAtomicCompareExchangeWeak = 231,
++    SpvOpAtomicIIncrement = 232,
++    SpvOpAtomicIDecrement = 233,
++    SpvOpAtomicIAdd = 234,
++    SpvOpAtomicISub = 235,
++    SpvOpAtomicSMin = 236,
++    SpvOpAtomicUMin = 237,
++    SpvOpAtomicSMax = 238,
++    SpvOpAtomicUMax = 239,
++    SpvOpAtomicAnd = 240,
++    SpvOpAtomicOr = 241,
++    SpvOpAtomicXor = 242,
++    SpvOpPhi = 245,
++    SpvOpLoopMerge = 246,
++    SpvOpSelectionMerge = 247,
++    SpvOpLabel = 248,
++    SpvOpBranch = 249,
++    SpvOpBranchConditional = 250,
++    SpvOpSwitch = 251,
++    SpvOpKill = 252,
++    SpvOpReturn = 253,
++    SpvOpReturnValue = 254,
++    SpvOpUnreachable = 255,
++    SpvOpLifetimeStart = 256,
++    SpvOpLifetimeStop = 257,
++    SpvOpGroupAsyncCopy = 259,
++    SpvOpGroupWaitEvents = 260,
++    SpvOpGroupAll = 261,
++    SpvOpGroupAny = 262,
++    SpvOpGroupBroadcast = 263,
++    SpvOpGroupIAdd = 264,
++    SpvOpGroupFAdd = 265,
++    SpvOpGroupFMin = 266,
++    SpvOpGroupUMin = 267,
++    SpvOpGroupSMin = 268,
++    SpvOpGroupFMax = 269,
++    SpvOpGroupUMax = 270,
++    SpvOpGroupSMax = 271,
++    SpvOpReadPipe = 274,
++    SpvOpWritePipe = 275,
++    SpvOpReservedReadPipe = 276,
++    SpvOpReservedWritePipe = 277,
++    SpvOpReserveReadPipePackets = 278,
++    SpvOpReserveWritePipePackets = 279,
++    SpvOpCommitReadPipe = 280,
++    SpvOpCommitWritePipe = 281,
++    SpvOpIsValidReserveId = 282,
++    SpvOpGetNumPipePackets = 283,
++    SpvOpGetMaxPipePackets = 284,
++    SpvOpGroupReserveReadPipePackets = 285,
++    SpvOpGroupReserveWritePipePackets = 286,
++    SpvOpGroupCommitReadPipe = 287,
++    SpvOpGroupCommitWritePipe = 288,
++    SpvOpEnqueueMarker = 291,
++    SpvOpEnqueueKernel = 292,
++    SpvOpGetKernelNDrangeSubGroupCount = 293,
++    SpvOpGetKernelNDrangeMaxSubGroupSize = 294,
++    SpvOpGetKernelWorkGroupSize = 295,
++    SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296,
++    SpvOpRetainEvent = 297,
++    SpvOpReleaseEvent = 298,
++    SpvOpCreateUserEvent = 299,
++    SpvOpIsValidEvent = 300,
++    SpvOpSetUserEventStatus = 301,
++    SpvOpCaptureEventProfilingInfo = 302,
++    SpvOpGetDefaultQueue = 303,
++    SpvOpBuildNDRange = 304,
++    SpvOpImageSparseSampleImplicitLod = 305,
++    SpvOpImageSparseSampleExplicitLod = 306,
++    SpvOpImageSparseSampleDrefImplicitLod = 307,
++    SpvOpImageSparseSampleDrefExplicitLod = 308,
++    SpvOpImageSparseSampleProjImplicitLod = 309,
++    SpvOpImageSparseSampleProjExplicitLod = 310,
++    SpvOpImageSparseSampleProjDrefImplicitLod = 311,
++    SpvOpImageSparseSampleProjDrefExplicitLod = 312,
++    SpvOpImageSparseFetch = 313,
++    SpvOpImageSparseGather = 314,
++    SpvOpImageSparseDrefGather = 315,
++    SpvOpImageSparseTexelsResident = 316,
++    SpvOpNoLine = 317,
++    SpvOpAtomicFlagTestAndSet = 318,
++    SpvOpAtomicFlagClear = 319,
++} SpvOp;
++
++#endif  // #ifndef spirv_H
++
diff --cc src/compiler/nir/spirv/spirv_to_nir.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..c002457ce12d5fad42babaf94952a2f45e4e6a38

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/spirv/spirv_to_nir.c
@@@ -1,0 -1,0 +1,2654 @@@
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ *
++ * Authors:
++ *    Jason Ekstrand (jason@jlekstrand.net)
++ *
++ */
++
++#include "vtn_private.h"
++#include "nir/nir_vla.h"
++#include "nir/nir_control_flow.h"
++#include "nir/nir_constant_expressions.h"
++
++static struct vtn_ssa_value *
++vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
++{
++   struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
++   val->type = type;
++
++   if (glsl_type_is_vector_or_scalar(type)) {
++      unsigned num_components = glsl_get_vector_elements(val->type);
++      nir_ssa_undef_instr *undef =
++         nir_ssa_undef_instr_create(b->shader, num_components);
++
++      nir_instr_insert_before_cf_list(&b->impl->body, &undef->instr);
++      val->def = &undef->def;
++   } else {
++      unsigned elems = glsl_get_length(val->type);
++      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
++      if (glsl_type_is_matrix(type)) {
++         const struct glsl_type *elem_type =
++            glsl_vector_type(glsl_get_base_type(type),
++                             glsl_get_vector_elements(type));
++
++         for (unsigned i = 0; i < elems; i++)
++            val->elems[i] = vtn_undef_ssa_value(b, elem_type);
++      } else if (glsl_type_is_array(type)) {
++         const struct glsl_type *elem_type = glsl_get_array_element(type);
++         for (unsigned i = 0; i < elems; i++)
++            val->elems[i] = vtn_undef_ssa_value(b, elem_type);
++      } else {
++         for (unsigned i = 0; i < elems; i++) {
++            const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
++            val->elems[i] = vtn_undef_ssa_value(b, elem_type);
++         }
++      }
++   }
++
++   return val;
++}
++
++static struct vtn_ssa_value *
++vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
++                    const struct glsl_type *type)
++{
++   struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant);
++
++   if (entry)
++      return entry->data;
++
++   struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
++   val->type = type;
++
++   switch (glsl_get_base_type(type)) {
++   case GLSL_TYPE_INT:
++   case GLSL_TYPE_UINT:
++   case GLSL_TYPE_BOOL:
++   case GLSL_TYPE_FLOAT:
++   case GLSL_TYPE_DOUBLE:
++      if (glsl_type_is_vector_or_scalar(type)) {
++         unsigned num_components = glsl_get_vector_elements(val->type);
++         nir_load_const_instr *load =
++            nir_load_const_instr_create(b->shader, num_components);
++
++         for (unsigned i = 0; i < num_components; i++)
++            load->value.u[i] = constant->value.u[i];
++
++         nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
++         val->def = &load->def;
++      } else {
++         assert(glsl_type_is_matrix(type));
++         unsigned rows = glsl_get_vector_elements(val->type);
++         unsigned columns = glsl_get_matrix_columns(val->type);
++         val->elems = ralloc_array(b, struct vtn_ssa_value *, columns);
++
++         for (unsigned i = 0; i < columns; i++) {
++            struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value);
++            col_val->type = glsl_get_column_type(val->type);
++            nir_load_const_instr *load =
++               nir_load_const_instr_create(b->shader, rows);
++
++            for (unsigned j = 0; j < rows; j++)
++               load->value.u[j] = constant->value.u[rows * i + j];
++
++            nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
++            col_val->def = &load->def;
++
++            val->elems[i] = col_val;
++         }
++      }
++      break;
++
++   case GLSL_TYPE_ARRAY: {
++      unsigned elems = glsl_get_length(val->type);
++      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
++      const struct glsl_type *elem_type = glsl_get_array_element(val->type);
++      for (unsigned i = 0; i < elems; i++)
++         val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
++                                             elem_type);
++      break;
++   }
++
++   case GLSL_TYPE_STRUCT: {
++      unsigned elems = glsl_get_length(val->type);
++      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
++      for (unsigned i = 0; i < elems; i++) {
++         const struct glsl_type *elem_type =
++            glsl_get_struct_field(val->type, i);
++         val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
++                                             elem_type);
++      }
++      break;
++   }
++
++   default:
++      unreachable("bad constant type");
++   }
++
++   return val;
++}
++
++struct vtn_ssa_value *
++vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
++{
++   struct vtn_value *val = vtn_untyped_value(b, value_id);
++   switch (val->value_type) {
++   case vtn_value_type_undef:
++      return vtn_undef_ssa_value(b, val->type->type);
++
++   case vtn_value_type_constant:
++      return vtn_const_ssa_value(b, val->constant, val->const_type);
++
++   case vtn_value_type_ssa:
++      return val->ssa;
++
++   case vtn_value_type_access_chain:
++      /* This is needed for function parameters */
++      return vtn_variable_load(b, val->access_chain);
++
++   default:
++      unreachable("Invalid type for an SSA value");
++   }
++}
++
++static char *
++vtn_string_literal(struct vtn_builder *b, const uint32_t *words,
++                   unsigned word_count, unsigned *words_used)
++{
++   char *dup = ralloc_strndup(b, (char *)words, word_count * sizeof(*words));
++   if (words_used) {
++      /* Ammount of space taken by the string (including the null) */
++      unsigned len = strlen(dup) + 1;
++      *words_used = DIV_ROUND_UP(len, sizeof(*words));
++   }
++   return dup;
++}
++
++const uint32_t *
++vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
++                        const uint32_t *end, vtn_instruction_handler handler)
++{
++   b->file = NULL;
++   b->line = -1;
++   b->col = -1;
++
++   const uint32_t *w = start;
++   while (w < end) {
++      SpvOp opcode = w[0] & SpvOpCodeMask;
++      unsigned count = w[0] >> SpvWordCountShift;
++      assert(count >= 1 && w + count <= end);
++
++      switch (opcode) {
++      case SpvOpNop:
++         break; /* Do nothing */
++
++      case SpvOpLine:
++         b->file = vtn_value(b, w[1], vtn_value_type_string)->str;
++         b->line = w[2];
++         b->col = w[3];
++         break;
++
++      case SpvOpNoLine:
++         b->file = NULL;
++         b->line = -1;
++         b->col = -1;
++         break;
++
++      default:
++         if (!handler(b, opcode, w, count))
++            return w;
++         break;
++      }
++
++      w += count;
++   }
++   assert(w == end);
++   return w;
++}
++
++static void
++vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
++                     const uint32_t *w, unsigned count)
++{
++   switch (opcode) {
++   case SpvOpExtInstImport: {
++      struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension);
++      if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) {
++         val->ext_handler = vtn_handle_glsl450_instruction;
++      } else {
++         assert(!"Unsupported extension");
++      }
++      break;
++   }
++
++   case SpvOpExtInst: {
++      struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
++      bool handled = val->ext_handler(b, w[4], w, count);
++      (void)handled;
++      assert(handled);
++      break;
++   }
++
++   default:
++      unreachable("Unhandled opcode");
++   }
++}
++
++static void
++_foreach_decoration_helper(struct vtn_builder *b,
++                           struct vtn_value *base_value,
++                           int parent_member,
++                           struct vtn_value *value,
++                           vtn_decoration_foreach_cb cb, void *data)
++{
++   for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
++      int member;
++      if (dec->scope == VTN_DEC_DECORATION) {
++         member = parent_member;
++      } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) {
++         assert(parent_member == -1);
++         member = dec->scope - VTN_DEC_STRUCT_MEMBER0;
++      } else {
++         /* Not a decoration */
++         continue;
++      }
++
++      if (dec->group) {
++         assert(dec->group->value_type == vtn_value_type_decoration_group);
++         _foreach_decoration_helper(b, base_value, member, dec->group,
++                                    cb, data);
++      } else {
++         cb(b, base_value, member, dec, data);
++      }
++   }
++}
++
++/** Iterates (recursively if needed) over all of the decorations on a value
++ *
++ * This function iterates over all of the decorations applied to a given
++ * value.  If it encounters a decoration group, it recurses into the group
++ * and iterates over all of those decorations as well.
++ */
++void
++vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
++                       vtn_decoration_foreach_cb cb, void *data)
++{
++   _foreach_decoration_helper(b, value, -1, value, cb, data);
++}
++
++void
++vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
++                           vtn_execution_mode_foreach_cb cb, void *data)
++{
++   for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
++      if (dec->scope != VTN_DEC_EXECUTION_MODE)
++         continue;
++
++      assert(dec->group == NULL);
++      cb(b, value, dec, data);
++   }
++}
++
++static void
++vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode,
++                      const uint32_t *w, unsigned count)
++{
++   const uint32_t *w_end = w + count;
++   const uint32_t target = w[1];
++   w += 2;
++
++   switch (opcode) {
++   case SpvOpDecorationGroup:
++      vtn_push_value(b, target, vtn_value_type_decoration_group);
++      break;
++
++   case SpvOpDecorate:
++   case SpvOpMemberDecorate:
++   case SpvOpExecutionMode: {
++      struct vtn_value *val = &b->values[target];
++
++      struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
++      switch (opcode) {
++      case SpvOpDecorate:
++         dec->scope = VTN_DEC_DECORATION;
++         break;
++      case SpvOpMemberDecorate:
++         dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
++         break;
++      case SpvOpExecutionMode:
++         dec->scope = VTN_DEC_EXECUTION_MODE;
++         break;
++      default:
++         unreachable("Invalid decoration opcode");
++      }
++      dec->decoration = *(w++);
++      dec->literals = w;
++
++      /* Link into the list */
++      dec->next = val->decoration;
++      val->decoration = dec;
++      break;
++   }
++
++   case SpvOpGroupMemberDecorate:
++   case SpvOpGroupDecorate: {
++      struct vtn_value *group =
++         vtn_value(b, target, vtn_value_type_decoration_group);
++
++      for (; w < w_end; w++) {
++         struct vtn_value *val = vtn_untyped_value(b, *w);
++         struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
++
++         dec->group = group;
++         if (opcode == SpvOpGroupDecorate) {
++            dec->scope = VTN_DEC_DECORATION;
++         } else {
++            dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
++         }
++
++         /* Link into the list */
++         dec->next = val->decoration;
++         val->decoration = dec;
++      }
++      break;
++   }
++
++   default:
++      unreachable("Unhandled opcode");
++   }
++}
++
++struct member_decoration_ctx {
++   struct glsl_struct_field *fields;
++   struct vtn_type *type;
++};
++
++/* does a shallow copy of a vtn_type */
++
++static struct vtn_type *
++vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
++{
++   struct vtn_type *dest = ralloc(b, struct vtn_type);
++   dest->type = src->type;
++   dest->is_builtin = src->is_builtin;
++   if (src->is_builtin)
++      dest->builtin = src->builtin;
++
++   if (!glsl_type_is_scalar(src->type)) {
++      switch (glsl_get_base_type(src->type)) {
++      case GLSL_TYPE_INT:
++      case GLSL_TYPE_UINT:
++      case GLSL_TYPE_BOOL:
++      case GLSL_TYPE_FLOAT:
++      case GLSL_TYPE_DOUBLE:
++      case GLSL_TYPE_ARRAY:
++         dest->row_major = src->row_major;
++         dest->stride = src->stride;
++         dest->array_element = src->array_element;
++         break;
++
++      case GLSL_TYPE_STRUCT: {
++         unsigned elems = glsl_get_length(src->type);
++
++         dest->members = ralloc_array(b, struct vtn_type *, elems);
++         memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *));
++
++         dest->offsets = ralloc_array(b, unsigned, elems);
++         memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned));
++         break;
++      }
++
++      default:
++         unreachable("unhandled type");
++      }
++   }
++
++   return dest;
++}
++
++static struct vtn_type *
++mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member)
++{
++   type->members[member] = vtn_type_copy(b, type->members[member]);
++   type = type->members[member];
++
++   /* We may have an array of matrices.... Oh, joy! */
++   while (glsl_type_is_array(type->type)) {
++      type->array_element = vtn_type_copy(b, type->array_element);
++      type = type->array_element;
++   }
++
++   assert(glsl_type_is_matrix(type->type));
++
++   return type;
++}
++
++static void
++struct_member_decoration_cb(struct vtn_builder *b,
++                            struct vtn_value *val, int member,
++                            const struct vtn_decoration *dec, void *void_ctx)
++{
++   struct member_decoration_ctx *ctx = void_ctx;
++
++   if (member < 0)
++      return;
++
++   switch (dec->decoration) {
++   case SpvDecorationRelaxedPrecision:
++      break; /* FIXME: Do nothing with this for now. */
++   case SpvDecorationNoPerspective:
++      ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
++      break;
++   case SpvDecorationFlat:
++      ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT;
++      break;
++   case SpvDecorationCentroid:
++      ctx->fields[member].centroid = true;
++      break;
++   case SpvDecorationSample:
++      ctx->fields[member].sample = true;
++      break;
++   case SpvDecorationLocation:
++      ctx->fields[member].location = dec->literals[0];
++      break;
++   case SpvDecorationBuiltIn:
++      ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]);
++      ctx->type->members[member]->is_builtin = true;
++      ctx->type->members[member]->builtin = dec->literals[0];
++      ctx->type->builtin_block = true;
++      break;
++   case SpvDecorationOffset:
++      ctx->type->offsets[member] = dec->literals[0];
++      break;
++   case SpvDecorationMatrixStride:
++      mutable_matrix_member(b, ctx->type, member)->stride = dec->literals[0];
++      break;
++   case SpvDecorationColMajor:
++      break; /* Nothing to do here.  Column-major is the default. */
++   case SpvDecorationRowMajor:
++      mutable_matrix_member(b, ctx->type, member)->row_major = true;
++      break;
++   default:
++      unreachable("Unhandled member decoration");
++   }
++}
++
++static void
++type_decoration_cb(struct vtn_builder *b,
++                   struct vtn_value *val, int member,
++                    const struct vtn_decoration *dec, void *ctx)
++{
++   struct vtn_type *type = val->type;
++
++   if (member != -1)
++      return;
++
++   switch (dec->decoration) {
++   case SpvDecorationArrayStride:
++      type->stride = dec->literals[0];
++      break;
++   case SpvDecorationBlock:
++      type->block = true;
++      break;
++   case SpvDecorationBufferBlock:
++      type->buffer_block = true;
++      break;
++   case SpvDecorationGLSLShared:
++   case SpvDecorationGLSLPacked:
++      /* Ignore these, since we get explicit offsets anyways */
++      break;
++
++   case SpvDecorationStream:
++      assert(dec->literals[0] == 0);
++      break;
++
++   default:
++      unreachable("Unhandled type decoration");
++   }
++}
++
++static unsigned
++translate_image_format(SpvImageFormat format)
++{
++   switch (format) {
++   case SpvImageFormatUnknown:      return 0;      /* GL_NONE */
++   case SpvImageFormatRgba32f:      return 0x8814; /* GL_RGBA32F */
++   case SpvImageFormatRgba16f:      return 0x881A; /* GL_RGBA16F */
++   case SpvImageFormatR32f:         return 0x822E; /* GL_R32F */
++   case SpvImageFormatRgba8:        return 0x8058; /* GL_RGBA8 */
++   case SpvImageFormatRgba8Snorm:   return 0x8F97; /* GL_RGBA8_SNORM */
++   case SpvImageFormatRg32f:        return 0x8230; /* GL_RG32F */
++   case SpvImageFormatRg16f:        return 0x822F; /* GL_RG16F */
++   case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */
++   case SpvImageFormatR16f:         return 0x822D; /* GL_R16F */
++   case SpvImageFormatRgba16:       return 0x805B; /* GL_RGBA16 */
++   case SpvImageFormatRgb10A2:      return 0x8059; /* GL_RGB10_A2 */
++   case SpvImageFormatRg16:         return 0x822C; /* GL_RG16 */
++   case SpvImageFormatRg8:          return 0x822B; /* GL_RG8 */
++   case SpvImageFormatR16:          return 0x822A; /* GL_R16 */
++   case SpvImageFormatR8:           return 0x8229; /* GL_R8 */
++   case SpvImageFormatRgba16Snorm:  return 0x8F9B; /* GL_RGBA16_SNORM */
++   case SpvImageFormatRg16Snorm:    return 0x8F99; /* GL_RG16_SNORM */
++   case SpvImageFormatRg8Snorm:     return 0x8F95; /* GL_RG8_SNORM */
++   case SpvImageFormatR16Snorm:     return 0x8F98; /* GL_R16_SNORM */
++   case SpvImageFormatR8Snorm:      return 0x8F94; /* GL_R8_SNORM */
++   case SpvImageFormatRgba32i:      return 0x8D82; /* GL_RGBA32I */
++   case SpvImageFormatRgba16i:      return 0x8D88; /* GL_RGBA16I */
++   case SpvImageFormatRgba8i:       return 0x8D8E; /* GL_RGBA8I */
++   case SpvImageFormatR32i:         return 0x8235; /* GL_R32I */
++   case SpvImageFormatRg32i:        return 0x823B; /* GL_RG32I */
++   case SpvImageFormatRg16i:        return 0x8239; /* GL_RG16I */
++   case SpvImageFormatRg8i:         return 0x8237; /* GL_RG8I */
++   case SpvImageFormatR16i:         return 0x8233; /* GL_R16I */
++   case SpvImageFormatR8i:          return 0x8231; /* GL_R8I */
++   case SpvImageFormatRgba32ui:     return 0x8D70; /* GL_RGBA32UI */
++   case SpvImageFormatRgba16ui:     return 0x8D76; /* GL_RGBA16UI */
++   case SpvImageFormatRgba8ui:      return 0x8D7C; /* GL_RGBA8UI */
++   case SpvImageFormatR32ui:        return 0x8236; /* GL_R32UI */
++   case SpvImageFormatRgb10a2ui:    return 0x906F; /* GL_RGB10_A2UI */
++   case SpvImageFormatRg32ui:       return 0x823C; /* GL_RG32UI */
++   case SpvImageFormatRg16ui:       return 0x823A; /* GL_RG16UI */
++   case SpvImageFormatRg8ui:        return 0x8238; /* GL_RG8UI */
++   case SpvImageFormatR16ui:        return 0x823A; /* GL_RG16UI */
++   case SpvImageFormatR8ui:         return 0x8232; /* GL_R8UI */
++   default:
++      assert(!"Invalid image format");
++      return 0;
++   }
++}
++
++static void
++vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
++                const uint32_t *w, unsigned count)
++{
++   struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type);
++
++   val->type = rzalloc(b, struct vtn_type);
++   val->type->is_builtin = false;
++   val->type->val = val;
++
++   switch (opcode) {
++   case SpvOpTypeVoid:
++      val->type->type = glsl_void_type();
++      break;
++   case SpvOpTypeBool:
++      val->type->type = glsl_bool_type();
++      break;
++   case SpvOpTypeInt:
++      val->type->type = glsl_int_type();
++      break;
++   case SpvOpTypeFloat:
++      val->type->type = glsl_float_type();
++      break;
++
++   case SpvOpTypeVector: {
++      struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type;
++      unsigned elems = w[3];
++
++      assert(glsl_type_is_scalar(base->type));
++      val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
++
++      /* Vectors implicitly have sizeof(base_type) stride.  For now, this
++       * is always 4 bytes.  This will have to change if we want to start
++       * supporting doubles or half-floats.
++       */
++      val->type->stride = 4;
++      val->type->array_element = base;
++      break;
++   }
++
++   case SpvOpTypeMatrix: {
++      struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type;
++      unsigned columns = w[3];
++
++      assert(glsl_type_is_vector(base->type));
++      val->type->type = glsl_matrix_type(glsl_get_base_type(base->type),
++                                         glsl_get_vector_elements(base->type),
++                                         columns);
++      assert(!glsl_type_is_error(val->type->type));
++      val->type->array_element = base;
++      val->type->row_major = false;
++      val->type->stride = 0;
++      break;
++   }
++
++   case SpvOpTypeRuntimeArray:
++   case SpvOpTypeArray: {
++      struct vtn_type *array_element =
++         vtn_value(b, w[2], vtn_value_type_type)->type;
++
++      unsigned length;
++      if (opcode == SpvOpTypeRuntimeArray) {
++         /* A length of 0 is used to denote unsized arrays */
++         length = 0;
++      } else {
++         length =
++            vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0];
++      }
++
++      val->type->type = glsl_array_type(array_element->type, length);
++      val->type->array_element = array_element;
++      val->type->stride = 0;
++      break;
++   }
++
++   case SpvOpTypeStruct: {
++      unsigned num_fields = count - 2;
++      val->type->members = ralloc_array(b, struct vtn_type *, num_fields);
++      val->type->offsets = ralloc_array(b, unsigned, num_fields);
++
++      NIR_VLA(struct glsl_struct_field, fields, count);
++      for (unsigned i = 0; i < num_fields; i++) {
++         val->type->members[i] =
++            vtn_value(b, w[i + 2], vtn_value_type_type)->type;
++         fields[i] = (struct glsl_struct_field) {
++            .type = val->type->members[i]->type,
++            .name = ralloc_asprintf(b, "field%d", i),
++            .location = -1,
++         };
++      }
++
++      struct member_decoration_ctx ctx = {
++         .fields = fields,
++         .type = val->type
++      };
++
++      vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
++
++      const char *name = val->name ? val->name : "struct";
++
++      val->type->type = glsl_struct_type(fields, num_fields, name);
++      break;
++   }
++
++   case SpvOpTypeFunction: {
++      const struct glsl_type *return_type =
++         vtn_value(b, w[2], vtn_value_type_type)->type->type;
++      NIR_VLA(struct glsl_function_param, params, count - 3);
++      for (unsigned i = 0; i < count - 3; i++) {
++         params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type;
++
++         /* FIXME: */
++         params[i].in = true;
++         params[i].out = true;
++      }
++      val->type->type = glsl_function_type(return_type, params, count - 3);
++      break;
++   }
++
++   case SpvOpTypePointer:
++      /* FIXME:  For now, we'll just do the really lame thing and return
++       * the same type.  The validator should ensure that the proper number
++       * of dereferences happen
++       */
++      val->type = vtn_value(b, w[3], vtn_value_type_type)->type;
++      break;
++
++   case SpvOpTypeImage: {
++      const struct glsl_type *sampled_type =
++         vtn_value(b, w[2], vtn_value_type_type)->type->type;
++
++      assert(glsl_type_is_vector_or_scalar(sampled_type));
++
++      enum glsl_sampler_dim dim;
++      switch ((SpvDim)w[3]) {
++      case SpvDim1D:       dim = GLSL_SAMPLER_DIM_1D;    break;
++      case SpvDim2D:       dim = GLSL_SAMPLER_DIM_2D;    break;
++      case SpvDim3D:       dim = GLSL_SAMPLER_DIM_3D;    break;
++      case SpvDimCube:     dim = GLSL_SAMPLER_DIM_CUBE;  break;
++      case SpvDimRect:     dim = GLSL_SAMPLER_DIM_RECT;  break;
++      case SpvDimBuffer:   dim = GLSL_SAMPLER_DIM_BUF;   break;
++      default:
++         unreachable("Invalid SPIR-V Sampler dimension");
++      }
++
++      bool is_shadow = w[4];
++      bool is_array = w[5];
++      bool multisampled = w[6];
++      unsigned sampled = w[7];
++      SpvImageFormat format = w[8];
++
++      if (count > 9)
++         val->type->access_qualifier = w[9];
++      else
++         val->type->access_qualifier = SpvAccessQualifierReadWrite;
++
++      assert(!multisampled && "FIXME: Handl multi-sampled textures");
++
++      val->type->image_format = translate_image_format(format);
++
++      if (sampled == 1) {
++         val->type->type = glsl_sampler_type(dim, is_shadow, is_array,
++                                             glsl_get_base_type(sampled_type));
++      } else if (sampled == 2) {
++         assert(format);
++         assert(!is_shadow);
++         val->type->type = glsl_image_type(dim, is_array,
++                                           glsl_get_base_type(sampled_type));
++      } else {
++         assert(!"We need to know if the image will be sampled");
++      }
++      break;
++   }
++
++   case SpvOpTypeSampledImage:
++      val->type = vtn_value(b, w[2], vtn_value_type_type)->type;
++      break;
++
++   case SpvOpTypeSampler:
++      /* The actual sampler type here doesn't really matter.  It gets
++       * thrown away the moment you combine it with an image.  What really
++       * matters is that it's a sampler type as opposed to an integer type
++       * so the backend knows what to do.
++       *
++       * TODO: Eventually we should consider adding a "bare sampler" type
++       * to glsl_types.
++       */
++      val->type->type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false,
++                                          GLSL_TYPE_FLOAT);
++      break;
++
++   case SpvOpTypeOpaque:
++   case SpvOpTypeEvent:
++   case SpvOpTypeDeviceEvent:
++   case SpvOpTypeReserveId:
++   case SpvOpTypeQueue:
++   case SpvOpTypePipe:
++   default:
++      unreachable("Unhandled opcode");
++   }
++
++   vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
++}
++
++static nir_constant *
++vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type)
++{
++   nir_constant *c = rzalloc(b, nir_constant);
++
++   switch (glsl_get_base_type(type)) {
++   case GLSL_TYPE_INT:
++   case GLSL_TYPE_UINT:
++   case GLSL_TYPE_BOOL:
++   case GLSL_TYPE_FLOAT:
++   case GLSL_TYPE_DOUBLE:
++      /* Nothing to do here.  It's already initialized to zero */
++      break;
++
++   case GLSL_TYPE_ARRAY:
++      assert(glsl_get_length(type) > 0);
++      c->num_elements = glsl_get_length(type);
++      c->elements = ralloc_array(b, nir_constant *, c->num_elements);
++
++      c->elements[0] = vtn_null_constant(b, glsl_get_array_element(type));
++      for (unsigned i = 1; i < c->num_elements; i++)
++         c->elements[i] = c->elements[0];
++      break;
++
++   case GLSL_TYPE_STRUCT:
++      c->num_elements = glsl_get_length(type);
++      c->elements = ralloc_array(b, nir_constant *, c->num_elements);
++
++      for (unsigned i = 0; i < c->num_elements; i++) {
++         c->elements[i] = vtn_null_constant(b, glsl_get_struct_field(type, i));
++      }
++      break;
++
++   default:
++      unreachable("Invalid type for null constant");
++   }
++
++   return c;
++}
++
++static void
++spec_constant_deocoration_cb(struct vtn_builder *b, struct vtn_value *v,
++                             int member, const struct vtn_decoration *dec,
++                             void *data)
++{
++   assert(member == -1);
++   if (dec->decoration != SpvDecorationSpecId)
++      return;
++
++   uint32_t *const_value = data;
++
++   for (unsigned i = 0; i < b->num_specializations; i++) {
++      if (b->specializations[i].id == dec->literals[0]) {
++         *const_value = b->specializations[i].data;
++         return;
++      }
++   }
++}
++
++static uint32_t
++get_specialization(struct vtn_builder *b, struct vtn_value *val,
++                   uint32_t const_value)
++{
++   vtn_foreach_decoration(b, val, spec_constant_deocoration_cb, &const_value);
++   return const_value;
++}
++
++static void
++vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
++                    const uint32_t *w, unsigned count)
++{
++   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant);
++   val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
++   val->constant = rzalloc(b, nir_constant);
++   switch (opcode) {
++   case SpvOpConstantTrue:
++      assert(val->const_type == glsl_bool_type());
++      val->constant->value.u[0] = NIR_TRUE;
++      break;
++   case SpvOpConstantFalse:
++      assert(val->const_type == glsl_bool_type());
++      val->constant->value.u[0] = NIR_FALSE;
++      break;
++
++   case SpvOpSpecConstantTrue:
++   case SpvOpSpecConstantFalse: {
++      assert(val->const_type == glsl_bool_type());
++      uint32_t int_val =
++         get_specialization(b, val, (opcode == SpvOpSpecConstantTrue));
++      val->constant->value.u[0] = int_val ? NIR_TRUE : NIR_FALSE;
++      break;
++   }
++
++   case SpvOpConstant:
++      assert(glsl_type_is_scalar(val->const_type));
++      val->constant->value.u[0] = w[3];
++      break;
++   case SpvOpSpecConstant:
++      assert(glsl_type_is_scalar(val->const_type));
++      val->constant->value.u[0] = get_specialization(b, val, w[3]);
++      break;
++   case SpvOpSpecConstantComposite:
++   case SpvOpConstantComposite: {
++      unsigned elem_count = count - 3;
++      nir_constant **elems = ralloc_array(b, nir_constant *, elem_count);
++      for (unsigned i = 0; i < elem_count; i++)
++         elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant;
++
++      switch (glsl_get_base_type(val->const_type)) {
++      case GLSL_TYPE_UINT:
++      case GLSL_TYPE_INT:
++      case GLSL_TYPE_FLOAT:
++      case GLSL_TYPE_BOOL:
++         if (glsl_type_is_matrix(val->const_type)) {
++            unsigned rows = glsl_get_vector_elements(val->const_type);
++            assert(glsl_get_matrix_columns(val->const_type) == elem_count);
++            for (unsigned i = 0; i < elem_count; i++)
++               for (unsigned j = 0; j < rows; j++)
++                  val->constant->value.u[rows * i + j] = elems[i]->value.u[j];
++         } else {
++            assert(glsl_type_is_vector(val->const_type));
++            assert(glsl_get_vector_elements(val->const_type) == elem_count);
++            for (unsigned i = 0; i < elem_count; i++)
++               val->constant->value.u[i] = elems[i]->value.u[0];
++         }
++         ralloc_free(elems);
++         break;
++
++      case GLSL_TYPE_STRUCT:
++      case GLSL_TYPE_ARRAY:
++         ralloc_steal(val->constant, elems);
++         val->constant->num_elements = elem_count;
++         val->constant->elements = elems;
++         break;
++
++      default:
++         unreachable("Unsupported type for constants");
++      }
++      break;
++   }
++
++   case SpvOpSpecConstantOp: {
++      SpvOp opcode = get_specialization(b, val, w[3]);
++      switch (opcode) {
++      case SpvOpVectorShuffle: {
++         struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant);
++         struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant);
++         unsigned len0 = glsl_get_vector_elements(v0->const_type);
++         unsigned len1 = glsl_get_vector_elements(v1->const_type);
++
++         uint32_t u[8];
++         for (unsigned i = 0; i < len0; i++)
++            u[i] = v0->constant->value.u[i];
++         for (unsigned i = 0; i < len1; i++)
++            u[len0 + i] = v1->constant->value.u[i];
++
++         for (unsigned i = 0; i < count - 6; i++) {
++            uint32_t comp = w[i + 6];
++            if (comp == (uint32_t)-1) {
++               val->constant->value.u[i] = 0xdeadbeef;
++            } else {
++               val->constant->value.u[i] = u[comp];
++            }
++         }
++         return;
++      }
++
++      case SpvOpCompositeExtract:
++      case SpvOpCompositeInsert: {
++         struct vtn_value *comp;
++         unsigned deref_start;
++         struct nir_constant **c;
++         if (opcode == SpvOpCompositeExtract) {
++            comp = vtn_value(b, w[4], vtn_value_type_constant);
++            deref_start = 5;
++            c = &comp->constant;
++         } else {
++            comp = vtn_value(b, w[5], vtn_value_type_constant);
++            deref_start = 6;
++            val->constant = nir_constant_clone(comp->constant,
++                                               (nir_variable *)b);
++            c = &val->constant;
++         }
++
++         int elem = -1;
++         const struct glsl_type *type = comp->const_type;
++         for (unsigned i = deref_start; i < count; i++) {
++            switch (glsl_get_base_type(type)) {
++            case GLSL_TYPE_UINT:
++            case GLSL_TYPE_INT:
++            case GLSL_TYPE_FLOAT:
++            case GLSL_TYPE_BOOL:
++               /* If we hit this granularity, we're picking off an element */
++               if (elem < 0)
++                  elem = 0;
++
++               if (glsl_type_is_matrix(type)) {
++                  elem += w[i] * glsl_get_vector_elements(type);
++                  type = glsl_get_column_type(type);
++               } else {
++                  assert(glsl_type_is_vector(type));
++                  elem += w[i];
++                  type = glsl_scalar_type(glsl_get_base_type(type));
++               }
++               continue;
++
++            case GLSL_TYPE_ARRAY:
++               c = &(*c)->elements[w[i]];
++               type = glsl_get_array_element(type);
++               continue;
++
++            case GLSL_TYPE_STRUCT:
++               c = &(*c)->elements[w[i]];
++               type = glsl_get_struct_field(type, w[i]);
++               continue;
++
++            default:
++               unreachable("Invalid constant type");
++            }
++         }
++
++         if (opcode == SpvOpCompositeExtract) {
++            if (elem == -1) {
++               val->constant = *c;
++            } else {
++               unsigned num_components = glsl_get_vector_elements(type);
++               for (unsigned i = 0; i < num_components; i++)
++                  val->constant->value.u[i] = (*c)->value.u[elem + i];
++            }
++         } else {
++            struct vtn_value *insert =
++               vtn_value(b, w[4], vtn_value_type_constant);
++            assert(insert->const_type == type);
++            if (elem == -1) {
++               *c = insert->constant;
++            } else {
++               unsigned num_components = glsl_get_vector_elements(type);
++               for (unsigned i = 0; i < num_components; i++)
++                  (*c)->value.u[elem + i] = insert->constant->value.u[i];
++            }
++         }
++         return;
++      }
++
++      default: {
++         bool swap;
++         nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
++
++         unsigned num_components = glsl_get_vector_elements(val->const_type);
++
++         nir_const_value src[3];
++         assert(count <= 7);
++         for (unsigned i = 0; i < count - 4; i++) {
++            nir_constant *c =
++               vtn_value(b, w[4 + i], vtn_value_type_constant)->constant;
++
++            unsigned j = swap ? 1 - i : i;
++            for (unsigned k = 0; k < num_components; k++)
++               src[j].u[k] = c->value.u[k];
++         }
++
++         nir_const_value res = nir_eval_const_opcode(op, num_components, src);
++
++         for (unsigned k = 0; k < num_components; k++)
++            val->constant->value.u[k] = res.u[k];
++
++         return;
++      } /* default */
++      }
++   }
++
++   case SpvOpConstantNull:
++      val->constant = vtn_null_constant(b, val->const_type);
++      break;
++
++   case SpvOpConstantSampler:
++      assert(!"OpConstantSampler requires Kernel Capability");
++      break;
++
++   default:
++      unreachable("Unhandled opcode");
++   }
++}
++
++static void
++vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode,
++                         const uint32_t *w, unsigned count)
++{
++   struct nir_function *callee =
++      vtn_value(b, w[3], vtn_value_type_function)->func->impl->function;
++
++   nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee);
++   for (unsigned i = 0; i < call->num_params; i++) {
++      unsigned arg_id = w[4 + i];
++      struct vtn_value *arg = vtn_untyped_value(b, arg_id);
++      if (arg->value_type == vtn_value_type_access_chain) {
++         nir_deref_var *d = vtn_access_chain_to_deref(b, arg->access_chain);
++         call->params[i] = nir_deref_as_var(nir_copy_deref(call, &d->deref));
++      } else {
++         struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id);
++
++         /* Make a temporary to store the argument in */
++         nir_variable *tmp =
++            nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp");
++         call->params[i] = nir_deref_var_create(call, tmp);
++
++         vtn_local_store(b, arg_ssa, call->params[i]);
++      }
++   }
++
++   nir_variable *out_tmp = NULL;
++   if (!glsl_type_is_void(callee->return_type)) {
++      out_tmp = nir_local_variable_create(b->impl, callee->return_type,
++                                          "out_tmp");
++      call->return_deref = nir_deref_var_create(call, out_tmp);
++   }
++
++   nir_builder_instr_insert(&b->nb, &call->instr);
++
++   if (glsl_type_is_void(callee->return_type)) {
++      vtn_push_value(b, w[2], vtn_value_type_undef);
++   } else {
++      struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa);
++      retval->ssa = vtn_local_load(b, call->return_deref);
++   }
++}
++
++struct vtn_ssa_value *
++vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
++{
++   struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
++   val->type = type;
++
++   if (!glsl_type_is_vector_or_scalar(type)) {
++      unsigned elems = glsl_get_length(type);
++      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
++      for (unsigned i = 0; i < elems; i++) {
++         const struct glsl_type *child_type;
++
++         switch (glsl_get_base_type(type)) {
++         case GLSL_TYPE_INT:
++         case GLSL_TYPE_UINT:
++         case GLSL_TYPE_BOOL:
++         case GLSL_TYPE_FLOAT:
++         case GLSL_TYPE_DOUBLE:
++            child_type = glsl_get_column_type(type);
++            break;
++         case GLSL_TYPE_ARRAY:
++            child_type = glsl_get_array_element(type);
++            break;
++         case GLSL_TYPE_STRUCT:
++            child_type = glsl_get_struct_field(type, i);
++            break;
++         default:
++            unreachable("unkown base type");
++         }
++
++         val->elems[i] = vtn_create_ssa_value(b, child_type);
++      }
++   }
++
++   return val;
++}
++
++static nir_tex_src
++vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
++{
++   nir_tex_src src;
++   src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def);
++   src.src_type = type;
++   return src;
++}
++
++static void
++vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
++                   const uint32_t *w, unsigned count)
++{
++   if (opcode == SpvOpSampledImage) {
++      struct vtn_value *val =
++         vtn_push_value(b, w[2], vtn_value_type_sampled_image);
++      val->sampled_image = ralloc(b, struct vtn_sampled_image);
++      val->sampled_image->image =
++         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++      val->sampled_image->sampler =
++         vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain;
++      return;
++   } else if (opcode == SpvOpImage) {
++      struct vtn_value *val =
++         vtn_push_value(b, w[2], vtn_value_type_access_chain);
++      struct vtn_value *src_val = vtn_untyped_value(b, w[3]);
++      if (src_val->value_type == vtn_value_type_sampled_image) {
++         val->access_chain = src_val->sampled_image->image;
++      } else {
++         assert(src_val->value_type == vtn_value_type_access_chain);
++         val->access_chain = src_val->access_chain;
++      }
++      return;
++   }
++
++   struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type;
++   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++
++   struct vtn_sampled_image sampled;
++   struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
++   if (sampled_val->value_type == vtn_value_type_sampled_image) {
++      sampled = *sampled_val->sampled_image;
++   } else {
++      assert(sampled_val->value_type == vtn_value_type_access_chain);
++      sampled.image = NULL;
++      sampled.sampler = sampled_val->access_chain;
++   }
++
++   nir_tex_src srcs[8]; /* 8 should be enough */
++   nir_tex_src *p = srcs;
++
++   unsigned idx = 4;
++
++   bool has_coord = false;
++   switch (opcode) {
++   case SpvOpImageSampleImplicitLod:
++   case SpvOpImageSampleExplicitLod:
++   case SpvOpImageSampleDrefImplicitLod:
++   case SpvOpImageSampleDrefExplicitLod:
++   case SpvOpImageSampleProjImplicitLod:
++   case SpvOpImageSampleProjExplicitLod:
++   case SpvOpImageSampleProjDrefImplicitLod:
++   case SpvOpImageSampleProjDrefExplicitLod:
++   case SpvOpImageFetch:
++   case SpvOpImageGather:
++   case SpvOpImageDrefGather:
++   case SpvOpImageQueryLod: {
++      /* All these types have the coordinate as their first real argument */
++      struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]);
++      has_coord = true;
++      p->src = nir_src_for_ssa(coord->def);
++      p->src_type = nir_tex_src_coord;
++      p++;
++      break;
++   }
++
++   default:
++      break;
++   }
++
++   /* These all have an explicit depth value as their next source */
++   switch (opcode) {
++   case SpvOpImageSampleDrefImplicitLod:
++   case SpvOpImageSampleDrefExplicitLod:
++   case SpvOpImageSampleProjDrefImplicitLod:
++   case SpvOpImageSampleProjDrefExplicitLod:
++      (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor);
++      break;
++   default:
++      break;
++   }
++
++   /* For OpImageQuerySizeLod, we always have an LOD */
++   if (opcode == SpvOpImageQuerySizeLod)
++      (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
++
++   /* Figure out the base texture operation */
++   nir_texop texop;
++   switch (opcode) {
++   case SpvOpImageSampleImplicitLod:
++   case SpvOpImageSampleDrefImplicitLod:
++   case SpvOpImageSampleProjImplicitLod:
++   case SpvOpImageSampleProjDrefImplicitLod:
++      texop = nir_texop_tex;
++      break;
++
++   case SpvOpImageSampleExplicitLod:
++   case SpvOpImageSampleDrefExplicitLod:
++   case SpvOpImageSampleProjExplicitLod:
++   case SpvOpImageSampleProjDrefExplicitLod:
++      texop = nir_texop_txl;
++      break;
++
++   case SpvOpImageFetch:
++      texop = nir_texop_txf;
++      break;
++
++   case SpvOpImageGather:
++   case SpvOpImageDrefGather:
++      texop = nir_texop_tg4;
++      break;
++
++   case SpvOpImageQuerySizeLod:
++   case SpvOpImageQuerySize:
++      texop = nir_texop_txs;
++      break;
++
++   case SpvOpImageQueryLod:
++      texop = nir_texop_lod;
++      break;
++
++   case SpvOpImageQueryLevels:
++      texop = nir_texop_query_levels;
++      break;
++
++   case SpvOpImageQuerySamples:
++   default:
++      unreachable("Unhandled opcode");
++   }
++
++   /* Now we need to handle some number of optional arguments */
++   if (idx < count) {
++      uint32_t operands = w[idx++];
++
++      if (operands & SpvImageOperandsBiasMask) {
++         assert(texop == nir_texop_tex);
++         texop = nir_texop_txb;
++         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias);
++      }
++
++      if (operands & SpvImageOperandsLodMask) {
++         assert(texop == nir_texop_txl || texop == nir_texop_txf ||
++                texop == nir_texop_txs);
++         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
++      }
++
++      if (operands & SpvImageOperandsGradMask) {
++         assert(texop == nir_texop_tex);
++         texop = nir_texop_txd;
++         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx);
++         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy);
++      }
++
++      if (operands & SpvImageOperandsOffsetMask ||
++          operands & SpvImageOperandsConstOffsetMask)
++         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset);
++
++      if (operands & SpvImageOperandsConstOffsetsMask)
++         assert(!"Constant offsets to texture gather not yet implemented");
++
++      if (operands & SpvImageOperandsSampleMask) {
++         assert(texop == nir_texop_txf);
++         texop = nir_texop_txf_ms;
++         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
++      }
++   }
++   /* We should have now consumed exactly all of the arguments */
++   assert(idx == count);
++
++   nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs);
++   instr->op = texop;
++
++   memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
++
++   const struct glsl_type *image_type;
++   if (sampled.image) {
++      image_type = sampled.image->var->var->interface_type;
++   } else {
++      image_type = sampled.sampler->var->var->interface_type;
++   }
++
++   instr->sampler_dim = glsl_get_sampler_dim(image_type);
++   instr->is_array = glsl_sampler_type_is_array(image_type);
++   instr->is_shadow = glsl_sampler_type_is_shadow(image_type);
++   instr->is_new_style_shadow = instr->is_shadow;
++
++   if (has_coord) {
++      switch (instr->sampler_dim) {
++      case GLSL_SAMPLER_DIM_1D:
++      case GLSL_SAMPLER_DIM_BUF:
++         instr->coord_components = 1;
++         break;
++      case GLSL_SAMPLER_DIM_2D:
++      case GLSL_SAMPLER_DIM_RECT:
++         instr->coord_components = 2;
++         break;
++      case GLSL_SAMPLER_DIM_3D:
++      case GLSL_SAMPLER_DIM_CUBE:
++      case GLSL_SAMPLER_DIM_MS:
++         instr->coord_components = 3;
++         break;
++      default:
++         assert("Invalid sampler type");
++      }
++
++      if (instr->is_array)
++         instr->coord_components++;
++   } else {
++      instr->coord_components = 0;
++   }
++
++   switch (glsl_get_sampler_result_type(image_type)) {
++   case GLSL_TYPE_FLOAT:   instr->dest_type = nir_type_float;     break;
++   case GLSL_TYPE_INT:     instr->dest_type = nir_type_int;       break;
++   case GLSL_TYPE_UINT:    instr->dest_type = nir_type_uint;  break;
++   case GLSL_TYPE_BOOL:    instr->dest_type = nir_type_bool;      break;
++   default:
++      unreachable("Invalid base type for sampler result");
++   }
++
++   nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler);
++   instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref));
++   if (sampled.image) {
++      nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image);
++      instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref));
++   } else {
++      instr->texture = NULL;
++   }
++
++   nir_ssa_dest_init(&instr->instr, &instr->dest,
++                     nir_tex_instr_dest_size(instr), NULL);
++
++   assert(glsl_get_vector_elements(ret_type->type) ==
++          nir_tex_instr_dest_size(instr));
++
++   val->ssa = vtn_create_ssa_value(b, ret_type->type);
++   val->ssa->def = &instr->dest.ssa;
++
++   nir_builder_instr_insert(&b->nb, &instr->instr);
++}
++
++static nir_ssa_def *
++get_image_coord(struct vtn_builder *b, uint32_t value)
++{
++   struct vtn_ssa_value *coord = vtn_ssa_value(b, value);
++
++   /* The image_load_store intrinsics assume a 4-dim coordinate */
++   unsigned dim = glsl_get_vector_elements(coord->type);
++   unsigned swizzle[4];
++   for (unsigned i = 0; i < 4; i++)
++      swizzle[i] = MIN2(i, dim - 1);
++
++   return nir_swizzle(&b->nb, coord->def, swizzle, 4, false);
++}
++
++static void
++vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
++                 const uint32_t *w, unsigned count)
++{
++   /* Just get this one out of the way */
++   if (opcode == SpvOpImageTexelPointer) {
++      struct vtn_value *val =
++         vtn_push_value(b, w[2], vtn_value_type_image_pointer);
++      val->image = ralloc(b, struct vtn_image_pointer);
++
++      val->image->image =
++         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++      val->image->coord = get_image_coord(b, w[4]);
++      val->image->sample = vtn_ssa_value(b, w[5])->def;
++      return;
++   }
++
++   struct vtn_image_pointer image;
++
++   switch (opcode) {
++   case SpvOpAtomicExchange:
++   case SpvOpAtomicCompareExchange:
++   case SpvOpAtomicCompareExchangeWeak:
++   case SpvOpAtomicIIncrement:
++   case SpvOpAtomicIDecrement:
++   case SpvOpAtomicIAdd:
++   case SpvOpAtomicISub:
++   case SpvOpAtomicSMin:
++   case SpvOpAtomicUMin:
++   case SpvOpAtomicSMax:
++   case SpvOpAtomicUMax:
++   case SpvOpAtomicAnd:
++   case SpvOpAtomicOr:
++   case SpvOpAtomicXor:
++      image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image;
++      break;
++
++   case SpvOpImageQuerySize:
++      image.image =
++         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++      image.coord = NULL;
++      image.sample = NULL;
++      break;
++
++   case SpvOpImageRead:
++      image.image =
++         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++      image.coord = get_image_coord(b, w[4]);
++
++      if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) {
++         assert(w[5] == SpvImageOperandsSampleMask);
++         image.sample = vtn_ssa_value(b, w[6])->def;
++      } else {
++         image.sample = nir_ssa_undef(&b->nb, 1);
++      }
++      break;
++
++   case SpvOpImageWrite:
++      image.image =
++         vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
++      image.coord = get_image_coord(b, w[2]);
++
++      /* texel = w[3] */
++
++      if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) {
++         assert(w[4] == SpvImageOperandsSampleMask);
++         image.sample = vtn_ssa_value(b, w[5])->def;
++      } else {
++         image.sample = nir_ssa_undef(&b->nb, 1);
++      }
++      break;
++
++   default:
++      unreachable("Invalid image opcode");
++   }
++
++   nir_intrinsic_op op;
++   switch (opcode) {
++#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break;
++   OP(ImageQuerySize,         size)
++   OP(ImageRead,              load)
++   OP(ImageWrite,             store)
++   OP(AtomicExchange,         atomic_exchange)
++   OP(AtomicCompareExchange,  atomic_comp_swap)
++   OP(AtomicIIncrement,       atomic_add)
++   OP(AtomicIDecrement,       atomic_add)
++   OP(AtomicIAdd,             atomic_add)
++   OP(AtomicISub,             atomic_add)
++   OP(AtomicSMin,             atomic_min)
++   OP(AtomicUMin,             atomic_min)
++   OP(AtomicSMax,             atomic_max)
++   OP(AtomicUMax,             atomic_max)
++   OP(AtomicAnd,              atomic_and)
++   OP(AtomicOr,               atomic_or)
++   OP(AtomicXor,              atomic_xor)
++#undef OP
++   default:
++      unreachable("Invalid image opcode");
++   }
++
++   nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
++
++   nir_deref_var *image_deref = vtn_access_chain_to_deref(b, image.image);
++   intrin->variables[0] =
++      nir_deref_as_var(nir_copy_deref(&intrin->instr, &image_deref->deref));
++
++   /* ImageQuerySize doesn't take any extra parameters */
++   if (opcode != SpvOpImageQuerySize) {
++      /* The image coordinate is always 4 components but we may not have that
++       * many.  Swizzle to compensate.
++       */
++      unsigned swiz[4];
++      for (unsigned i = 0; i < 4; i++)
++         swiz[i] = i < image.coord->num_components ? i : 0;
++      intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord,
++                                                   swiz, 4, false));
++      intrin->src[1] = nir_src_for_ssa(image.sample);
++   }
++
++   switch (opcode) {
++   case SpvOpImageQuerySize:
++   case SpvOpImageRead:
++      break;
++   case SpvOpImageWrite:
++      intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
++      break;
++   case SpvOpAtomicIIncrement:
++      intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1));
++      break;
++   case SpvOpAtomicIDecrement:
++      intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1));
++      break;
++
++   case SpvOpAtomicExchange:
++   case SpvOpAtomicIAdd:
++   case SpvOpAtomicSMin:
++   case SpvOpAtomicUMin:
++   case SpvOpAtomicSMax:
++   case SpvOpAtomicUMax:
++   case SpvOpAtomicAnd:
++   case SpvOpAtomicOr:
++   case SpvOpAtomicXor:
++      intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
++      break;
++
++   case SpvOpAtomicCompareExchange:
++      intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
++      intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
++      break;
++
++   case SpvOpAtomicISub:
++      intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def));
++      break;
++
++   default:
++      unreachable("Invalid image opcode");
++   }
++
++   if (opcode != SpvOpImageWrite) {
++      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++      struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
++      nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, NULL);
++
++      nir_builder_instr_insert(&b->nb, &intrin->instr);
++
++      /* The image intrinsics always return 4 channels but we may not want
++       * that many.  Emit a mov to trim it down.
++       */
++      unsigned swiz[4] = {0, 1, 2, 3};
++      val->ssa = vtn_create_ssa_value(b, type->type);
++      val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz,
++                                  glsl_get_vector_elements(type->type), false);
++   } else {
++      nir_builder_instr_insert(&b->nb, &intrin->instr);
++   }
++}
++
++static nir_intrinsic_op
++get_ssbo_nir_atomic_op(SpvOp opcode)
++{
++   switch (opcode) {
++#define OP(S, N) case SpvOp##S: return nir_intrinsic_ssbo_##N;
++   OP(AtomicExchange,         atomic_exchange)
++   OP(AtomicCompareExchange,  atomic_comp_swap)
++   OP(AtomicIIncrement,       atomic_add)
++   OP(AtomicIDecrement,       atomic_add)
++   OP(AtomicIAdd,             atomic_add)
++   OP(AtomicISub,             atomic_add)
++   OP(AtomicSMin,             atomic_imin)
++   OP(AtomicUMin,             atomic_umin)
++   OP(AtomicSMax,             atomic_imax)
++   OP(AtomicUMax,             atomic_umax)
++   OP(AtomicAnd,              atomic_and)
++   OP(AtomicOr,               atomic_or)
++   OP(AtomicXor,              atomic_xor)
++#undef OP
++   default:
++      unreachable("Invalid SSBO atomic");
++   }
++}
++
++static nir_intrinsic_op
++get_shared_nir_atomic_op(SpvOp opcode)
++{
++   switch (opcode) {
++#define OP(S, N) case SpvOp##S: return nir_intrinsic_var_##N;
++   OP(AtomicExchange,         atomic_exchange)
++   OP(AtomicCompareExchange,  atomic_comp_swap)
++   OP(AtomicIIncrement,       atomic_add)
++   OP(AtomicIDecrement,       atomic_add)
++   OP(AtomicIAdd,             atomic_add)
++   OP(AtomicISub,             atomic_add)
++   OP(AtomicSMin,             atomic_imin)
++   OP(AtomicUMin,             atomic_umin)
++   OP(AtomicSMax,             atomic_imax)
++   OP(AtomicUMax,             atomic_umax)
++   OP(AtomicAnd,              atomic_and)
++   OP(AtomicOr,               atomic_or)
++   OP(AtomicXor,              atomic_xor)
++#undef OP
++   default:
++      unreachable("Invalid shared atomic");
++   }
++}
++
++static void
++fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
++                           const uint32_t *w, nir_src *src)
++{
++   switch (opcode) {
++   case SpvOpAtomicIIncrement:
++      src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, 1));
++      break;
++
++   case SpvOpAtomicIDecrement:
++      src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, -1));
++      break;
++
++   case SpvOpAtomicISub:
++      src[0] =
++         nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def));
++      break;
++
++   case SpvOpAtomicCompareExchange:
++      src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
++      src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
++      break;
++      /* Fall through */
++
++   case SpvOpAtomicExchange:
++   case SpvOpAtomicIAdd:
++   case SpvOpAtomicSMin:
++   case SpvOpAtomicUMin:
++   case SpvOpAtomicSMax:
++   case SpvOpAtomicUMax:
++   case SpvOpAtomicAnd:
++   case SpvOpAtomicOr:
++   case SpvOpAtomicXor:
++      src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
++      break;
++
++   default:
++      unreachable("Invalid SPIR-V atomic");
++   }
++}
++
++static void
++vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
++                                 const uint32_t *w, unsigned count)
++{
++   struct vtn_access_chain *chain =
++      vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++   nir_intrinsic_instr *atomic;
++
++   /*
++   SpvScope scope = w[4];
++   SpvMemorySemanticsMask semantics = w[5];
++   */
++
++   if (chain->var->mode == vtn_variable_mode_workgroup) {
++      nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref;
++      nir_intrinsic_op op = get_shared_nir_atomic_op(opcode);
++      atomic = nir_intrinsic_instr_create(b->nb.shader, op);
++      atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref));
++      fill_common_atomic_sources(b, opcode, w, &atomic->src[0]);
++   } else {
++      assert(chain->var->mode == vtn_variable_mode_ssbo);
++      struct vtn_type *type;
++      nir_ssa_def *offset, *index;
++      offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false);
++
++      nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode);
++
++      atomic = nir_intrinsic_instr_create(b->nb.shader, op);
++      atomic->src[0] = nir_src_for_ssa(index);
++      atomic->src[1] = nir_src_for_ssa(offset);
++      fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
++   }
++
++   nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL);
++
++   struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
++   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++   val->ssa = rzalloc(b, struct vtn_ssa_value);
++   val->ssa->def = &atomic->dest.ssa;
++   val->ssa->type = type->type;
++
++   nir_builder_instr_insert(&b->nb, &atomic->instr);
++}
++
++static nir_alu_instr *
++create_vec(nir_shader *shader, unsigned num_components)
++{
++   nir_op op;
++   switch (num_components) {
++   case 1: op = nir_op_fmov; break;
++   case 2: op = nir_op_vec2; break;
++   case 3: op = nir_op_vec3; break;
++   case 4: op = nir_op_vec4; break;
++   default: unreachable("bad vector size");
++   }
++
++   nir_alu_instr *vec = nir_alu_instr_create(shader, op);
++   nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
++   vec->dest.write_mask = (1 << num_components) - 1;
++
++   return vec;
++}
++
++struct vtn_ssa_value *
++vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
++{
++   if (src->transposed)
++      return src->transposed;
++
++   struct vtn_ssa_value *dest =
++      vtn_create_ssa_value(b, glsl_transposed_type(src->type));
++
++   for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
++      nir_alu_instr *vec = create_vec(b->shader,
++                                      glsl_get_matrix_columns(src->type));
++      if (glsl_type_is_vector_or_scalar(src->type)) {
++          vec->src[0].src = nir_src_for_ssa(src->def);
++          vec->src[0].swizzle[0] = i;
++      } else {
++         for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) {
++            vec->src[j].src = nir_src_for_ssa(src->elems[j]->def);
++            vec->src[j].swizzle[0] = i;
++         }
++      }
++      nir_builder_instr_insert(&b->nb, &vec->instr);
++      dest->elems[i]->def = &vec->dest.dest.ssa;
++   }
++
++   dest->transposed = src;
++
++   return dest;
++}
++
++nir_ssa_def *
++vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index)
++{
++   unsigned swiz[4] = { index };
++   return nir_swizzle(&b->nb, src, swiz, 1, true);
++}
++
++nir_ssa_def *
++vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert,
++                  unsigned index)
++{
++   nir_alu_instr *vec = create_vec(b->shader, src->num_components);
++
++   for (unsigned i = 0; i < src->num_components; i++) {
++      if (i == index) {
++         vec->src[i].src = nir_src_for_ssa(insert);
++      } else {
++         vec->src[i].src = nir_src_for_ssa(src);
++         vec->src[i].swizzle[0] = i;
++      }
++   }
++
++   nir_builder_instr_insert(&b->nb, &vec->instr);
++
++   return &vec->dest.dest.ssa;
++}
++
++nir_ssa_def *
++vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
++                           nir_ssa_def *index)
++{
++   nir_ssa_def *dest = vtn_vector_extract(b, src, 0);
++   for (unsigned i = 1; i < src->num_components; i++)
++      dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
++                       vtn_vector_extract(b, src, i), dest);
++
++   return dest;
++}
++
++nir_ssa_def *
++vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src,
++                          nir_ssa_def *insert, nir_ssa_def *index)
++{
++   nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0);
++   for (unsigned i = 1; i < src->num_components; i++)
++      dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
++                       vtn_vector_insert(b, src, insert, i), dest);
++
++   return dest;
++}
++
++static nir_ssa_def *
++vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
++                   nir_ssa_def *src0, nir_ssa_def *src1,
++                   const uint32_t *indices)
++{
++   nir_alu_instr *vec = create_vec(b->shader, num_components);
++
++   nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1);
++   nir_builder_instr_insert(&b->nb, &undef->instr);
++
++   for (unsigned i = 0; i < num_components; i++) {
++      uint32_t index = indices[i];
++      if (index == 0xffffffff) {
++         vec->src[i].src = nir_src_for_ssa(&undef->def);
++      } else if (index < src0->num_components) {
++         vec->src[i].src = nir_src_for_ssa(src0);
++         vec->src[i].swizzle[0] = index;
++      } else {
++         vec->src[i].src = nir_src_for_ssa(src1);
++         vec->src[i].swizzle[0] = index - src0->num_components;
++      }
++   }
++
++   nir_builder_instr_insert(&b->nb, &vec->instr);
++
++   return &vec->dest.dest.ssa;
++}
++
++/*
++ * Concatentates a number of vectors/scalars together to produce a vector
++ */
++static nir_ssa_def *
++vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
++                     unsigned num_srcs, nir_ssa_def **srcs)
++{
++   nir_alu_instr *vec = create_vec(b->shader, num_components);
++
++   unsigned dest_idx = 0;
++   for (unsigned i = 0; i < num_srcs; i++) {
++      nir_ssa_def *src = srcs[i];
++      for (unsigned j = 0; j < src->num_components; j++) {
++         vec->src[dest_idx].src = nir_src_for_ssa(src);
++         vec->src[dest_idx].swizzle[0] = j;
++         dest_idx++;
++      }
++   }
++
++   nir_builder_instr_insert(&b->nb, &vec->instr);
++
++   return &vec->dest.dest.ssa;
++}
++
++static struct vtn_ssa_value *
++vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src)
++{
++   struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value);
++   dest->type = src->type;
++
++   if (glsl_type_is_vector_or_scalar(src->type)) {
++      dest->def = src->def;
++   } else {
++      unsigned elems = glsl_get_length(src->type);
++
++      dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems);
++      for (unsigned i = 0; i < elems; i++)
++         dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]);
++   }
++
++   return dest;
++}
++
++static struct vtn_ssa_value *
++vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
++                     struct vtn_ssa_value *insert, const uint32_t *indices,
++                     unsigned num_indices)
++{
++   struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
++
++   struct vtn_ssa_value *cur = dest;
++   unsigned i;
++   for (i = 0; i < num_indices - 1; i++) {
++      cur = cur->elems[indices[i]];
++   }
++
++   if (glsl_type_is_vector_or_scalar(cur->type)) {
++      /* According to the SPIR-V spec, OpCompositeInsert may work down to
++       * the component granularity. In that case, the last index will be
++       * the index to insert the scalar into the vector.
++       */
++
++      cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]);
++   } else {
++      cur->elems[indices[i]] = insert;
++   }
++
++   return dest;
++}
++
++static struct vtn_ssa_value *
++vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
++                      const uint32_t *indices, unsigned num_indices)
++{
++   struct vtn_ssa_value *cur = src;
++   for (unsigned i = 0; i < num_indices; i++) {
++      if (glsl_type_is_vector_or_scalar(cur->type)) {
++         assert(i == num_indices - 1);
++         /* According to the SPIR-V spec, OpCompositeExtract may work down to
++          * the component granularity. The last index will be the index of the
++          * vector to extract.
++          */
++
++         struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value);
++         ret->type = glsl_scalar_type(glsl_get_base_type(cur->type));
++         ret->def = vtn_vector_extract(b, cur->def, indices[i]);
++         return ret;
++      } else {
++         cur = cur->elems[indices[i]];
++      }
++   }
++
++   return cur;
++}
++
++static void
++vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
++                     const uint32_t *w, unsigned count)
++{
++   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++   const struct glsl_type *type =
++      vtn_value(b, w[1], vtn_value_type_type)->type->type;
++   val->ssa = vtn_create_ssa_value(b, type);
++
++   switch (opcode) {
++   case SpvOpVectorExtractDynamic:
++      val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def,
++                                                 vtn_ssa_value(b, w[4])->def);
++      break;
++
++   case SpvOpVectorInsertDynamic:
++      val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def,
++                                                vtn_ssa_value(b, w[4])->def,
++                                                vtn_ssa_value(b, w[5])->def);
++      break;
++
++   case SpvOpVectorShuffle:
++      val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type),
++                                         vtn_ssa_value(b, w[3])->def,
++                                         vtn_ssa_value(b, w[4])->def,
++                                         w + 5);
++      break;
++
++   case SpvOpCompositeConstruct: {
++      unsigned elems = count - 3;
++      if (glsl_type_is_vector_or_scalar(type)) {
++         nir_ssa_def *srcs[4];
++         for (unsigned i = 0; i < elems; i++)
++            srcs[i] = vtn_ssa_value(b, w[3 + i])->def;
++         val->ssa->def =
++            vtn_vector_construct(b, glsl_get_vector_elements(type),
++                                 elems, srcs);
++      } else {
++         val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
++         for (unsigned i = 0; i < elems; i++)
++            val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
++      }
++      break;
++   }
++   case SpvOpCompositeExtract:
++      val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
++                                       w + 4, count - 4);
++      break;
++
++   case SpvOpCompositeInsert:
++      val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
++                                      vtn_ssa_value(b, w[3]),
++                                      w + 5, count - 5);
++      break;
++
++   case SpvOpCopyObject:
++      val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
++      break;
++
++   default:
++      unreachable("unknown composite operation");
++   }
++}
++
++static void
++vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
++                   const uint32_t *w, unsigned count)
++{
++   nir_intrinsic_op intrinsic_op;
++   switch (opcode) {
++   case SpvOpEmitVertex:
++   case SpvOpEmitStreamVertex:
++      intrinsic_op = nir_intrinsic_emit_vertex;
++      break;
++   case SpvOpEndPrimitive:
++   case SpvOpEndStreamPrimitive:
++      intrinsic_op = nir_intrinsic_end_primitive;
++      break;
++   case SpvOpMemoryBarrier:
++      intrinsic_op = nir_intrinsic_memory_barrier;
++      break;
++   case SpvOpControlBarrier:
++      intrinsic_op = nir_intrinsic_barrier;
++      break;
++   default:
++      unreachable("unknown barrier instruction");
++   }
++
++   nir_intrinsic_instr *intrin =
++      nir_intrinsic_instr_create(b->shader, intrinsic_op);
++
++   if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
++      intrin->const_index[0] = w[1];
++
++   nir_builder_instr_insert(&b->nb, &intrin->instr);
++}
++
++static unsigned
++gl_primitive_from_spv_execution_mode(SpvExecutionMode mode)
++{
++   switch (mode) {
++   case SpvExecutionModeInputPoints:
++   case SpvExecutionModeOutputPoints:
++      return 0; /* GL_POINTS */
++   case SpvExecutionModeInputLines:
++      return 1; /* GL_LINES */
++   case SpvExecutionModeInputLinesAdjacency:
++      return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */
++   case SpvExecutionModeTriangles:
++      return 4; /* GL_TRIANGLES */
++   case SpvExecutionModeInputTrianglesAdjacency:
++      return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */
++   case SpvExecutionModeQuads:
++      return 7; /* GL_QUADS */
++   case SpvExecutionModeIsolines:
++      return 0x8E7A; /* GL_ISOLINES */
++   case SpvExecutionModeOutputLineStrip:
++      return 3; /* GL_LINE_STRIP */
++   case SpvExecutionModeOutputTriangleStrip:
++      return 5; /* GL_TRIANGLE_STRIP */
++   default:
++      assert(!"Invalid primitive type");
++      return 4;
++   }
++}
++
++static unsigned
++vertices_in_from_spv_execution_mode(SpvExecutionMode mode)
++{
++   switch (mode) {
++   case SpvExecutionModeInputPoints:
++      return 1;
++   case SpvExecutionModeInputLines:
++      return 2;
++   case SpvExecutionModeInputLinesAdjacency:
++      return 4;
++   case SpvExecutionModeTriangles:
++      return 3;
++   case SpvExecutionModeInputTrianglesAdjacency:
++      return 6;
++   default:
++      assert(!"Invalid GS input mode");
++      return 0;
++   }
++}
++
++static gl_shader_stage
++stage_for_execution_model(SpvExecutionModel model)
++{
++   switch (model) {
++   case SpvExecutionModelVertex:
++      return MESA_SHADER_VERTEX;
++   case SpvExecutionModelTessellationControl:
++      return MESA_SHADER_TESS_CTRL;
++   case SpvExecutionModelTessellationEvaluation:
++      return MESA_SHADER_TESS_EVAL;
++   case SpvExecutionModelGeometry:
++      return MESA_SHADER_GEOMETRY;
++   case SpvExecutionModelFragment:
++      return MESA_SHADER_FRAGMENT;
++   case SpvExecutionModelGLCompute:
++      return MESA_SHADER_COMPUTE;
++   default:
++      unreachable("Unsupported execution model");
++   }
++}
++
++static bool
++vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
++                                const uint32_t *w, unsigned count)
++{
++   switch (opcode) {
++   case SpvOpSource:
++   case SpvOpSourceExtension:
++   case SpvOpSourceContinued:
++   case SpvOpExtension:
++      /* Unhandled, but these are for debug so that's ok. */
++      break;
++
++   case SpvOpCapability:
++      switch ((SpvCapability)w[1]) {
++      case SpvCapabilityMatrix:
++      case SpvCapabilityShader:
++      case SpvCapabilityGeometry:
++         break;
++      default:
++         assert(!"Unsupported capability");
++      }
++      break;
++
++   case SpvOpExtInstImport:
++      vtn_handle_extension(b, opcode, w, count);
++      break;
++
++   case SpvOpMemoryModel:
++      assert(w[1] == SpvAddressingModelLogical);
++      assert(w[2] == SpvMemoryModelGLSL450);
++      break;
++
++   case SpvOpEntryPoint: {
++      struct vtn_value *entry_point = &b->values[w[2]];
++      /* Let this be a name label regardless */
++      unsigned name_words;
++      entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words);
++
++      if (strcmp(entry_point->name, b->entry_point_name) != 0 ||
++          stage_for_execution_model(w[1]) != b->entry_point_stage)
++         break;
++
++      assert(b->entry_point == NULL);
++      b->entry_point = entry_point;
++      break;
++   }
++
++   case SpvOpString:
++      vtn_push_value(b, w[1], vtn_value_type_string)->str =
++         vtn_string_literal(b, &w[2], count - 2, NULL);
++      break;
++
++   case SpvOpName:
++      b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL);
++      break;
++
++   case SpvOpMemberName:
++      /* TODO */
++      break;
++
++   case SpvOpExecutionMode:
++   case SpvOpDecorationGroup:
++   case SpvOpDecorate:
++   case SpvOpMemberDecorate:
++   case SpvOpGroupDecorate:
++   case SpvOpGroupMemberDecorate:
++      vtn_handle_decoration(b, opcode, w, count);
++      break;
++
++   default:
++      return false; /* End of preamble */
++   }
++
++   return true;
++}
++
++static void
++vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
++                          const struct vtn_decoration *mode, void *data)
++{
++   assert(b->entry_point == entry_point);
++
++   switch(mode->exec_mode) {
++   case SpvExecutionModeOriginUpperLeft:
++   case SpvExecutionModeOriginLowerLeft:
++      b->origin_upper_left =
++         (mode->exec_mode == SpvExecutionModeOriginUpperLeft);
++      break;
++
++   case SpvExecutionModeEarlyFragmentTests:
++      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
++      b->shader->info.fs.early_fragment_tests = true;
++      break;
++
++   case SpvExecutionModeInvocations:
++      assert(b->shader->stage == MESA_SHADER_GEOMETRY);
++      b->shader->info.gs.invocations = MAX2(1, mode->literals[0]);
++      break;
++
++   case SpvExecutionModeDepthReplacing:
++      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
++      b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
++      break;
++   case SpvExecutionModeDepthGreater:
++      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
++      b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
++      break;
++   case SpvExecutionModeDepthLess:
++      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
++      b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
++      break;
++   case SpvExecutionModeDepthUnchanged:
++      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
++      b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
++      break;
++
++   case SpvExecutionModeLocalSize:
++      assert(b->shader->stage == MESA_SHADER_COMPUTE);
++      b->shader->info.cs.local_size[0] = mode->literals[0];
++      b->shader->info.cs.local_size[1] = mode->literals[1];
++      b->shader->info.cs.local_size[2] = mode->literals[2];
++      break;
++   case SpvExecutionModeLocalSizeHint:
++      break; /* Nothing do do with this */
++
++   case SpvExecutionModeOutputVertices:
++      assert(b->shader->stage == MESA_SHADER_GEOMETRY);
++      b->shader->info.gs.vertices_out = mode->literals[0];
++      break;
++
++   case SpvExecutionModeInputPoints:
++   case SpvExecutionModeInputLines:
++   case SpvExecutionModeInputLinesAdjacency:
++   case SpvExecutionModeTriangles:
++   case SpvExecutionModeInputTrianglesAdjacency:
++   case SpvExecutionModeQuads:
++   case SpvExecutionModeIsolines:
++      if (b->shader->stage == MESA_SHADER_GEOMETRY) {
++         b->shader->info.gs.vertices_in =
++            vertices_in_from_spv_execution_mode(mode->exec_mode);
++      } else {
++         assert(!"Tesselation shaders not yet supported");
++      }
++      break;
++
++   case SpvExecutionModeOutputPoints:
++   case SpvExecutionModeOutputLineStrip:
++   case SpvExecutionModeOutputTriangleStrip:
++      assert(b->shader->stage == MESA_SHADER_GEOMETRY);
++      b->shader->info.gs.output_primitive =
++         gl_primitive_from_spv_execution_mode(mode->exec_mode);
++      break;
++
++   case SpvExecutionModeSpacingEqual:
++   case SpvExecutionModeSpacingFractionalEven:
++   case SpvExecutionModeSpacingFractionalOdd:
++   case SpvExecutionModeVertexOrderCw:
++   case SpvExecutionModeVertexOrderCcw:
++   case SpvExecutionModePointMode:
++      assert(!"TODO: Add tessellation metadata");
++      break;
++
++   case SpvExecutionModePixelCenterInteger:
++   case SpvExecutionModeXfb:
++      assert(!"Unhandled execution mode");
++      break;
++
++   case SpvExecutionModeVecTypeHint:
++   case SpvExecutionModeContractionOff:
++      break; /* OpenCL */
++   }
++}
++
++static bool
++vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
++                                        const uint32_t *w, unsigned count)
++{
++   switch (opcode) {
++   case SpvOpSource:
++   case SpvOpSourceContinued:
++   case SpvOpSourceExtension:
++   case SpvOpExtension:
++   case SpvOpCapability:
++   case SpvOpExtInstImport:
++   case SpvOpMemoryModel:
++   case SpvOpEntryPoint:
++   case SpvOpExecutionMode:
++   case SpvOpString:
++   case SpvOpName:
++   case SpvOpMemberName:
++   case SpvOpDecorationGroup:
++   case SpvOpDecorate:
++   case SpvOpMemberDecorate:
++   case SpvOpGroupDecorate:
++   case SpvOpGroupMemberDecorate:
++      assert(!"Invalid opcode types and variables section");
++      break;
++
++   case SpvOpTypeVoid:
++   case SpvOpTypeBool:
++   case SpvOpTypeInt:
++   case SpvOpTypeFloat:
++   case SpvOpTypeVector:
++   case SpvOpTypeMatrix:
++   case SpvOpTypeImage:
++   case SpvOpTypeSampler:
++   case SpvOpTypeSampledImage:
++   case SpvOpTypeArray:
++   case SpvOpTypeRuntimeArray:
++   case SpvOpTypeStruct:
++   case SpvOpTypeOpaque:
++   case SpvOpTypePointer:
++   case SpvOpTypeFunction:
++   case SpvOpTypeEvent:
++   case SpvOpTypeDeviceEvent:
++   case SpvOpTypeReserveId:
++   case SpvOpTypeQueue:
++   case SpvOpTypePipe:
++      vtn_handle_type(b, opcode, w, count);
++      break;
++
++   case SpvOpConstantTrue:
++   case SpvOpConstantFalse:
++   case SpvOpConstant:
++   case SpvOpConstantComposite:
++   case SpvOpConstantSampler:
++   case SpvOpConstantNull:
++   case SpvOpSpecConstantTrue:
++   case SpvOpSpecConstantFalse:
++   case SpvOpSpecConstant:
++   case SpvOpSpecConstantComposite:
++   case SpvOpSpecConstantOp:
++      vtn_handle_constant(b, opcode, w, count);
++      break;
++
++   case SpvOpVariable:
++      vtn_handle_variables(b, opcode, w, count);
++      break;
++
++   default:
++      return false; /* End of preamble */
++   }
++
++   return true;
++}
++
++static bool
++vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
++                            const uint32_t *w, unsigned count)
++{
++   switch (opcode) {
++   case SpvOpLabel:
++      break;
++
++   case SpvOpLoopMerge:
++   case SpvOpSelectionMerge:
++      /* This is handled by cfg pre-pass and walk_blocks */
++      break;
++
++   case SpvOpUndef: {
++      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
++      val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
++      break;
++   }
++
++   case SpvOpExtInst:
++      vtn_handle_extension(b, opcode, w, count);
++      break;
++
++   case SpvOpVariable:
++   case SpvOpLoad:
++   case SpvOpStore:
++   case SpvOpCopyMemory:
++   case SpvOpCopyMemorySized:
++   case SpvOpAccessChain:
++   case SpvOpInBoundsAccessChain:
++   case SpvOpArrayLength:
++      vtn_handle_variables(b, opcode, w, count);
++      break;
++
++   case SpvOpFunctionCall:
++      vtn_handle_function_call(b, opcode, w, count);
++      break;
++
++   case SpvOpSampledImage:
++   case SpvOpImage:
++   case SpvOpImageSampleImplicitLod:
++   case SpvOpImageSampleExplicitLod:
++   case SpvOpImageSampleDrefImplicitLod:
++   case SpvOpImageSampleDrefExplicitLod:
++   case SpvOpImageSampleProjImplicitLod:
++   case SpvOpImageSampleProjExplicitLod:
++   case SpvOpImageSampleProjDrefImplicitLod:
++   case SpvOpImageSampleProjDrefExplicitLod:
++   case SpvOpImageFetch:
++   case SpvOpImageGather:
++   case SpvOpImageDrefGather:
++   case SpvOpImageQuerySizeLod:
++   case SpvOpImageQueryLod:
++   case SpvOpImageQueryLevels:
++   case SpvOpImageQuerySamples:
++      vtn_handle_texture(b, opcode, w, count);
++      break;
++
++   case SpvOpImageRead:
++   case SpvOpImageWrite:
++   case SpvOpImageTexelPointer:
++      vtn_handle_image(b, opcode, w, count);
++      break;
++
++   case SpvOpImageQuerySize: {
++      struct vtn_access_chain *image =
++         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++      if (glsl_type_is_image(image->var->var->interface_type)) {
++         vtn_handle_image(b, opcode, w, count);
++      } else {
++         vtn_handle_texture(b, opcode, w, count);
++      }
++      break;
++   }
++
++   case SpvOpAtomicExchange:
++   case SpvOpAtomicCompareExchange:
++   case SpvOpAtomicCompareExchangeWeak:
++   case SpvOpAtomicIIncrement:
++   case SpvOpAtomicIDecrement:
++   case SpvOpAtomicIAdd:
++   case SpvOpAtomicISub:
++   case SpvOpAtomicSMin:
++   case SpvOpAtomicUMin:
++   case SpvOpAtomicSMax:
++   case SpvOpAtomicUMax:
++   case SpvOpAtomicAnd:
++   case SpvOpAtomicOr:
++   case SpvOpAtomicXor: {
++      struct vtn_value *pointer = vtn_untyped_value(b, w[3]);
++      if (pointer->value_type == vtn_value_type_image_pointer) {
++         vtn_handle_image(b, opcode, w, count);
++      } else {
++         assert(pointer->value_type == vtn_value_type_access_chain);
++         vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count);
++      }
++      break;
++   }
++
++   case SpvOpSNegate:
++   case SpvOpFNegate:
++   case SpvOpNot:
++   case SpvOpAny:
++   case SpvOpAll:
++   case SpvOpConvertFToU:
++   case SpvOpConvertFToS:
++   case SpvOpConvertSToF:
++   case SpvOpConvertUToF:
++   case SpvOpUConvert:
++   case SpvOpSConvert:
++   case SpvOpFConvert:
++   case SpvOpQuantizeToF16:
++   case SpvOpConvertPtrToU:
++   case SpvOpConvertUToPtr:
++   case SpvOpPtrCastToGeneric:
++   case SpvOpGenericCastToPtr:
++   case SpvOpBitcast:
++   case SpvOpIsNan:
++   case SpvOpIsInf:
++   case SpvOpIsFinite:
++   case SpvOpIsNormal:
++   case SpvOpSignBitSet:
++   case SpvOpLessOrGreater:
++   case SpvOpOrdered:
++   case SpvOpUnordered:
++   case SpvOpIAdd:
++   case SpvOpFAdd:
++   case SpvOpISub:
++   case SpvOpFSub:
++   case SpvOpIMul:
++   case SpvOpFMul:
++   case SpvOpUDiv:
++   case SpvOpSDiv:
++   case SpvOpFDiv:
++   case SpvOpUMod:
++   case SpvOpSRem:
++   case SpvOpSMod:
++   case SpvOpFRem:
++   case SpvOpFMod:
++   case SpvOpVectorTimesScalar:
++   case SpvOpDot:
++   case SpvOpIAddCarry:
++   case SpvOpISubBorrow:
++   case SpvOpUMulExtended:
++   case SpvOpSMulExtended:
++   case SpvOpShiftRightLogical:
++   case SpvOpShiftRightArithmetic:
++   case SpvOpShiftLeftLogical:
++   case SpvOpLogicalEqual:
++   case SpvOpLogicalNotEqual:
++   case SpvOpLogicalOr:
++   case SpvOpLogicalAnd:
++   case SpvOpLogicalNot:
++   case SpvOpBitwiseOr:
++   case SpvOpBitwiseXor:
++   case SpvOpBitwiseAnd:
++   case SpvOpSelect:
++   case SpvOpIEqual:
++   case SpvOpFOrdEqual:
++   case SpvOpFUnordEqual:
++   case SpvOpINotEqual:
++   case SpvOpFOrdNotEqual:
++   case SpvOpFUnordNotEqual:
++   case SpvOpULessThan:
++   case SpvOpSLessThan:
++   case SpvOpFOrdLessThan:
++   case SpvOpFUnordLessThan:
++   case SpvOpUGreaterThan:
++   case SpvOpSGreaterThan:
++   case SpvOpFOrdGreaterThan:
++   case SpvOpFUnordGreaterThan:
++   case SpvOpULessThanEqual:
++   case SpvOpSLessThanEqual:
++   case SpvOpFOrdLessThanEqual:
++   case SpvOpFUnordLessThanEqual:
++   case SpvOpUGreaterThanEqual:
++   case SpvOpSGreaterThanEqual:
++   case SpvOpFOrdGreaterThanEqual:
++   case SpvOpFUnordGreaterThanEqual:
++   case SpvOpDPdx:
++   case SpvOpDPdy:
++   case SpvOpFwidth:
++   case SpvOpDPdxFine:
++   case SpvOpDPdyFine:
++   case SpvOpFwidthFine:
++   case SpvOpDPdxCoarse:
++   case SpvOpDPdyCoarse:
++   case SpvOpFwidthCoarse:
++   case SpvOpBitFieldInsert:
++   case SpvOpBitFieldSExtract:
++   case SpvOpBitFieldUExtract:
++   case SpvOpBitReverse:
++   case SpvOpBitCount:
++   case SpvOpTranspose:
++   case SpvOpOuterProduct:
++   case SpvOpMatrixTimesScalar:
++   case SpvOpVectorTimesMatrix:
++   case SpvOpMatrixTimesVector:
++   case SpvOpMatrixTimesMatrix:
++      vtn_handle_alu(b, opcode, w, count);
++      break;
++
++   case SpvOpVectorExtractDynamic:
++   case SpvOpVectorInsertDynamic:
++   case SpvOpVectorShuffle:
++   case SpvOpCompositeConstruct:
++   case SpvOpCompositeExtract:
++   case SpvOpCompositeInsert:
++   case SpvOpCopyObject:
++      vtn_handle_composite(b, opcode, w, count);
++      break;
++
++   case SpvOpEmitVertex:
++   case SpvOpEndPrimitive:
++   case SpvOpEmitStreamVertex:
++   case SpvOpEndStreamPrimitive:
++   case SpvOpControlBarrier:
++   case SpvOpMemoryBarrier:
++      vtn_handle_barrier(b, opcode, w, count);
++      break;
++
++   default:
++      unreachable("Unhandled opcode");
++   }
++
++   return true;
++}
++
++nir_function *
++spirv_to_nir(const uint32_t *words, size_t word_count,
++             struct nir_spirv_specialization *spec, unsigned num_spec,
++             gl_shader_stage stage, const char *entry_point_name,
++             const nir_shader_compiler_options *options)
++{
++   const uint32_t *word_end = words + word_count;
++
++   /* Handle the SPIR-V header (first 4 dwords)  */
++   assert(word_count > 5);
++
++   assert(words[0] == SpvMagicNumber);
++   assert(words[1] >= 0x10000);
++   /* words[2] == generator magic */
++   unsigned value_id_bound = words[3];
++   assert(words[4] == 0);
++
++   words+= 5;
++
++   /* Initialize the stn_builder object */
++   struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
++   b->value_id_bound = value_id_bound;
++   b->values = rzalloc_array(b, struct vtn_value, value_id_bound);
++   exec_list_make_empty(&b->functions);
++   b->entry_point_stage = stage;
++   b->entry_point_name = entry_point_name;
++
++   /* Handle all the preamble instructions */
++   words = vtn_foreach_instruction(b, words, word_end,
++                                   vtn_handle_preamble_instruction);
++
++   if (b->entry_point == NULL) {
++      assert(!"Entry point not found");
++      ralloc_free(b);
++      return NULL;
++   }
++
++   b->shader = nir_shader_create(NULL, stage, options);
++
++   /* Parse execution modes */
++   vtn_foreach_execution_mode(b, b->entry_point,
++                              vtn_handle_execution_mode, NULL);
++
++   b->specializations = spec;
++   b->num_specializations = num_spec;
++
++   /* Handle all variable, type, and constant instructions */
++   words = vtn_foreach_instruction(b, words, word_end,
++                                   vtn_handle_variable_or_type_instruction);
++
++   vtn_build_cfg(b, words, word_end);
++
++   foreach_list_typed(struct vtn_function, func, node, &b->functions) {
++      b->impl = func->impl;
++      b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
++                                               _mesa_key_pointer_equal);
++
++      vtn_function_emit(b, func, vtn_handle_body_instruction);
++   }
++
++   assert(b->entry_point->value_type == vtn_value_type_function);
++   nir_function *entry_point = b->entry_point->func->impl->function;
++   assert(entry_point);
++
++   ralloc_free(b);
++
++   return entry_point;
++}
diff --cc src/compiler/nir/spirv/vtn_alu.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..d866da7445e16d4e5709cde8762e362b452e78fc

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/spirv/vtn_alu.c
@@@ -1,0 -1,0 +1,448 @@@
++/*
++ * Copyright © 2016 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "vtn_private.h"
++
++/*
++ * Normally, column vectors in SPIR-V correspond to a single NIR SSA
++ * definition. But for matrix multiplies, we want to do one routine for
++ * multiplying a matrix by a matrix and then pretend that vectors are matrices
++ * with one column. So we "wrap" these things, and unwrap the result before we
++ * send it off.
++ */
++
++static struct vtn_ssa_value *
++wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val)
++{
++   if (val == NULL)
++      return NULL;
++
++   if (glsl_type_is_matrix(val->type))
++      return val;
++
++   struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value);
++   dest->type = val->type;
++   dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1);
++   dest->elems[0] = val;
++
++   return dest;
++}
++
++static struct vtn_ssa_value *
++unwrap_matrix(struct vtn_ssa_value *val)
++{
++   if (glsl_type_is_matrix(val->type))
++         return val;
++
++   return val->elems[0];
++}
++
++static struct vtn_ssa_value *
++matrix_multiply(struct vtn_builder *b,
++                struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1)
++{
++
++   struct vtn_ssa_value *src0 = wrap_matrix(b, _src0);
++   struct vtn_ssa_value *src1 = wrap_matrix(b, _src1);
++   struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed);
++   struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed);
++
++   unsigned src0_rows = glsl_get_vector_elements(src0->type);
++   unsigned src0_columns = glsl_get_matrix_columns(src0->type);
++   unsigned src1_columns = glsl_get_matrix_columns(src1->type);
++
++   const struct glsl_type *dest_type;
++   if (src1_columns > 1) {
++      dest_type = glsl_matrix_type(glsl_get_base_type(src0->type),
++                                   src0_rows, src1_columns);
++   } else {
++      dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows);
++   }
++   struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type);
++
++   dest = wrap_matrix(b, dest);
++
++   bool transpose_result = false;
++   if (src0_transpose && src1_transpose) {
++      /* transpose(A) * transpose(B) = transpose(B * A) */
++      src1 = src0_transpose;
++      src0 = src1_transpose;
++      src0_transpose = NULL;
++      src1_transpose = NULL;
++      transpose_result = true;
++   }
++
++   if (src0_transpose && !src1_transpose &&
++       glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) {
++      /* We already have the rows of src0 and the columns of src1 available,
++       * so we can just take the dot product of each row with each column to
++       * get the result.
++       */
++
++      for (unsigned i = 0; i < src1_columns; i++) {
++         nir_ssa_def *vec_src[4];
++         for (unsigned j = 0; j < src0_rows; j++) {
++            vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def,
++                                          src1->elems[i]->def);
++         }
++         dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows);
++      }
++   } else {
++      /* We don't handle the case where src1 is transposed but not src0, since
++       * the general case only uses individual components of src1 so the
++       * optimizer should chew through the transpose we emitted for src1.
++       */
++
++      for (unsigned i = 0; i < src1_columns; i++) {
++         /* dest[i] = sum(src0[j] * src1[i][j] for all j) */
++         dest->elems[i]->def =
++            nir_fmul(&b->nb, src0->elems[0]->def,
++                     nir_channel(&b->nb, src1->elems[i]->def, 0));
++         for (unsigned j = 1; j < src0_columns; j++) {
++            dest->elems[i]->def =
++               nir_fadd(&b->nb, dest->elems[i]->def,
++                        nir_fmul(&b->nb, src0->elems[j]->def,
++                                 nir_channel(&b->nb, src1->elems[i]->def, j)));
++         }
++      }
++   }
++
++   dest = unwrap_matrix(dest);
++
++   if (transpose_result)
++      dest = vtn_ssa_transpose(b, dest);
++
++   return dest;
++}
++
++static struct vtn_ssa_value *
++mat_times_scalar(struct vtn_builder *b,
++                 struct vtn_ssa_value *mat,
++                 nir_ssa_def *scalar)
++{
++   struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type);
++   for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) {
++      if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT)
++         dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar);
++      else
++         dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar);
++   }
++
++   return dest;
++}
++
++static void
++vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode,
++                      struct vtn_value *dest,
++                      struct vtn_ssa_value *src0, struct vtn_ssa_value *src1)
++{
++   switch (opcode) {
++   case SpvOpFNegate: {
++      dest->ssa = vtn_create_ssa_value(b, src0->type);
++      unsigned cols = glsl_get_matrix_columns(src0->type);
++      for (unsigned i = 0; i < cols; i++)
++         dest->ssa->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def);
++      break;
++   }
++
++   case SpvOpFAdd: {
++      dest->ssa = vtn_create_ssa_value(b, src0->type);
++      unsigned cols = glsl_get_matrix_columns(src0->type);
++      for (unsigned i = 0; i < cols; i++)
++         dest->ssa->elems[i]->def =
++            nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def);
++      break;
++   }
++
++   case SpvOpFSub: {
++      dest->ssa = vtn_create_ssa_value(b, src0->type);
++      unsigned cols = glsl_get_matrix_columns(src0->type);
++      for (unsigned i = 0; i < cols; i++)
++         dest->ssa->elems[i]->def =
++            nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def);
++      break;
++   }
++
++   case SpvOpTranspose:
++      dest->ssa = vtn_ssa_transpose(b, src0);
++      break;
++
++   case SpvOpMatrixTimesScalar:
++      if (src0->transposed) {
++         dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed,
++                                                           src1->def));
++      } else {
++         dest->ssa = mat_times_scalar(b, src0, src1->def);
++      }
++      break;
++
++   case SpvOpVectorTimesMatrix:
++   case SpvOpMatrixTimesVector:
++   case SpvOpMatrixTimesMatrix:
++      if (opcode == SpvOpVectorTimesMatrix) {
++         dest->ssa = matrix_multiply(b, vtn_ssa_transpose(b, src1), src0);
++      } else {
++         dest->ssa = matrix_multiply(b, src0, src1);
++      }
++      break;
++
++   default: unreachable("unknown matrix opcode");
++   }
++}
++
++nir_op
++vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap)
++{
++   /* Indicates that the first two arguments should be swapped.  This is
++    * used for implementing greater-than and less-than-or-equal.
++    */
++   *swap = false;
++
++   switch (opcode) {
++   case SpvOpSNegate:            return nir_op_ineg;
++   case SpvOpFNegate:            return nir_op_fneg;
++   case SpvOpNot:                return nir_op_inot;
++   case SpvOpIAdd:               return nir_op_iadd;
++   case SpvOpFAdd:               return nir_op_fadd;
++   case SpvOpISub:               return nir_op_isub;
++   case SpvOpFSub:               return nir_op_fsub;
++   case SpvOpIMul:               return nir_op_imul;
++   case SpvOpFMul:               return nir_op_fmul;
++   case SpvOpUDiv:               return nir_op_udiv;
++   case SpvOpSDiv:               return nir_op_idiv;
++   case SpvOpFDiv:               return nir_op_fdiv;
++   case SpvOpUMod:               return nir_op_umod;
++   case SpvOpSMod:               return nir_op_imod;
++   case SpvOpFMod:               return nir_op_fmod;
++   case SpvOpSRem:               return nir_op_irem;
++   case SpvOpFRem:               return nir_op_frem;
++
++   case SpvOpShiftRightLogical:     return nir_op_ushr;
++   case SpvOpShiftRightArithmetic:  return nir_op_ishr;
++   case SpvOpShiftLeftLogical:      return nir_op_ishl;
++   case SpvOpLogicalOr:             return nir_op_ior;
++   case SpvOpLogicalEqual:          return nir_op_ieq;
++   case SpvOpLogicalNotEqual:       return nir_op_ine;
++   case SpvOpLogicalAnd:            return nir_op_iand;
++   case SpvOpLogicalNot:            return nir_op_inot;
++   case SpvOpBitwiseOr:             return nir_op_ior;
++   case SpvOpBitwiseXor:            return nir_op_ixor;
++   case SpvOpBitwiseAnd:            return nir_op_iand;
++   case SpvOpSelect:                return nir_op_bcsel;
++   case SpvOpIEqual:                return nir_op_ieq;
++
++   case SpvOpBitFieldInsert:        return nir_op_bitfield_insert;
++   case SpvOpBitFieldSExtract:      return nir_op_ibitfield_extract;
++   case SpvOpBitFieldUExtract:      return nir_op_ubitfield_extract;
++   case SpvOpBitReverse:            return nir_op_bitfield_reverse;
++   case SpvOpBitCount:              return nir_op_bit_count;
++
++   /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */
++   case SpvOpFOrdEqual:                            return nir_op_feq;
++   case SpvOpFUnordEqual:                          return nir_op_feq;
++   case SpvOpINotEqual:                            return nir_op_ine;
++   case SpvOpFOrdNotEqual:                         return nir_op_fne;
++   case SpvOpFUnordNotEqual:                       return nir_op_fne;
++   case SpvOpULessThan:                            return nir_op_ult;
++   case SpvOpSLessThan:                            return nir_op_ilt;
++   case SpvOpFOrdLessThan:                         return nir_op_flt;
++   case SpvOpFUnordLessThan:                       return nir_op_flt;
++   case SpvOpUGreaterThan:          *swap = true;  return nir_op_ult;
++   case SpvOpSGreaterThan:          *swap = true;  return nir_op_ilt;
++   case SpvOpFOrdGreaterThan:       *swap = true;  return nir_op_flt;
++   case SpvOpFUnordGreaterThan:     *swap = true;  return nir_op_flt;
++   case SpvOpULessThanEqual:        *swap = true;  return nir_op_uge;
++   case SpvOpSLessThanEqual:        *swap = true;  return nir_op_ige;
++   case SpvOpFOrdLessThanEqual:     *swap = true;  return nir_op_fge;
++   case SpvOpFUnordLessThanEqual:   *swap = true;  return nir_op_fge;
++   case SpvOpUGreaterThanEqual:                    return nir_op_uge;
++   case SpvOpSGreaterThanEqual:                    return nir_op_ige;
++   case SpvOpFOrdGreaterThanEqual:                 return nir_op_fge;
++   case SpvOpFUnordGreaterThanEqual:               return nir_op_fge;
++
++   /* Conversions: */
++   case SpvOpConvertFToU:           return nir_op_f2u;
++   case SpvOpConvertFToS:           return nir_op_f2i;
++   case SpvOpConvertSToF:           return nir_op_i2f;
++   case SpvOpConvertUToF:           return nir_op_u2f;
++   case SpvOpBitcast:               return nir_op_imov;
++   case SpvOpUConvert:
++   case SpvOpQuantizeToF16:         return nir_op_fquantize2f16;
++   /* TODO: NIR is 32-bit only; these are no-ops. */
++   case SpvOpSConvert:              return nir_op_imov;
++   case SpvOpFConvert:              return nir_op_fmov;
++
++   /* Derivatives: */
++   case SpvOpDPdx:         return nir_op_fddx;
++   case SpvOpDPdy:         return nir_op_fddy;
++   case SpvOpDPdxFine:     return nir_op_fddx_fine;
++   case SpvOpDPdyFine:     return nir_op_fddy_fine;
++   case SpvOpDPdxCoarse:   return nir_op_fddx_coarse;
++   case SpvOpDPdyCoarse:   return nir_op_fddy_coarse;
++
++   default:
++      unreachable("No NIR equivalent");
++   }
++}
++
++void
++vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
++               const uint32_t *w, unsigned count)
++{
++   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++   const struct glsl_type *type =
++      vtn_value(b, w[1], vtn_value_type_type)->type->type;
++
++   /* Collect the various SSA sources */
++   const unsigned num_inputs = count - 3;
++   struct vtn_ssa_value *vtn_src[4] = { NULL, };
++   for (unsigned i = 0; i < num_inputs; i++)
++      vtn_src[i] = vtn_ssa_value(b, w[i + 3]);
++
++   if (glsl_type_is_matrix(vtn_src[0]->type) ||
++       (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) {
++      vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]);
++      return;
++   }
++
++   val->ssa = vtn_create_ssa_value(b, type);
++   nir_ssa_def *src[4] = { NULL, };
++   for (unsigned i = 0; i < num_inputs; i++) {
++      assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type));
++      src[i] = vtn_src[i]->def;
++   }
++
++   switch (opcode) {
++   case SpvOpAny:
++      if (src[0]->num_components == 1) {
++         val->ssa->def = nir_imov(&b->nb, src[0]);
++      } else {
++         nir_op op;
++         switch (src[0]->num_components) {
++         case 2:  op = nir_op_bany_inequal2; break;
++         case 3:  op = nir_op_bany_inequal3; break;
++         case 4:  op = nir_op_bany_inequal4; break;
++         }
++         val->ssa->def = nir_build_alu(&b->nb, op, src[0],
++                                       nir_imm_int(&b->nb, NIR_FALSE),
++                                       NULL, NULL);
++      }
++      return;
++
++   case SpvOpAll:
++      if (src[0]->num_components == 1) {
++         val->ssa->def = nir_imov(&b->nb, src[0]);
++      } else {
++         nir_op op;
++         switch (src[0]->num_components) {
++         case 2:  op = nir_op_ball_iequal2;  break;
++         case 3:  op = nir_op_ball_iequal3;  break;
++         case 4:  op = nir_op_ball_iequal4;  break;
++         }
++         val->ssa->def = nir_build_alu(&b->nb, op, src[0],
++                                       nir_imm_int(&b->nb, NIR_TRUE),
++                                       NULL, NULL);
++      }
++      return;
++
++   case SpvOpOuterProduct: {
++      for (unsigned i = 0; i < src[1]->num_components; i++) {
++         val->ssa->elems[i]->def =
++            nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i));
++      }
++      return;
++   }
++
++   case SpvOpDot:
++      val->ssa->def = nir_fdot(&b->nb, src[0], src[1]);
++      return;
++
++   case SpvOpIAddCarry:
++      assert(glsl_type_is_struct(val->ssa->type));
++      val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]);
++      val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]);
++      return;
++
++   case SpvOpISubBorrow:
++      assert(glsl_type_is_struct(val->ssa->type));
++      val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]);
++      val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]);
++      return;
++
++   case SpvOpUMulExtended:
++      assert(glsl_type_is_struct(val->ssa->type));
++      val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]);
++      val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]);
++      return;
++
++   case SpvOpSMulExtended:
++      assert(glsl_type_is_struct(val->ssa->type));
++      val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]);
++      val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]);
++      return;
++
++   case SpvOpFwidth:
++      val->ssa->def = nir_fadd(&b->nb,
++                               nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])),
++                               nir_fabs(&b->nb, nir_fddx(&b->nb, src[1])));
++      return;
++   case SpvOpFwidthFine:
++      val->ssa->def = nir_fadd(&b->nb,
++                               nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])),
++                               nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1])));
++      return;
++   case SpvOpFwidthCoarse:
++      val->ssa->def = nir_fadd(&b->nb,
++                               nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])),
++                               nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1])));
++      return;
++
++   case SpvOpVectorTimesScalar:
++      /* The builder will take care of splatting for us. */
++      val->ssa->def = nir_fmul(&b->nb, src[0], src[1]);
++      return;
++
++   case SpvOpIsNan:
++      val->ssa->def = nir_fne(&b->nb, src[0], src[0]);
++      return;
++
++   case SpvOpIsInf:
++      val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]),
++                                      nir_imm_float(&b->nb, INFINITY));
++      return;
++
++   default: {
++      bool swap;
++      nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
++
++      if (swap) {
++         nir_ssa_def *tmp = src[0];
++         src[0] = src[1];
++         src[1] = tmp;
++      }
++
++      val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]);
++      return;
++   } /* default */
++   }
++}
diff --cc src/compiler/nir/spirv/vtn_cfg.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..041408b1cfb25ae94c9b0c2a4665f16f55b276f6

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/spirv/vtn_cfg.c
@@@ -1,0 -1,0 +1,768 @@@
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "vtn_private.h"
++#include "nir/nir_vla.h"
++
++static bool
++vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode,
++                                   const uint32_t *w, unsigned count)
++{
++   switch (opcode) {
++   case SpvOpFunction: {
++      assert(b->func == NULL);
++      b->func = rzalloc(b, struct vtn_function);
++
++      list_inithead(&b->func->body);
++      b->func->control = w[3];
++
++      const struct glsl_type *result_type =
++         vtn_value(b, w[1], vtn_value_type_type)->type->type;
++      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function);
++      val->func = b->func;
++
++      const struct glsl_type *func_type =
++         vtn_value(b, w[4], vtn_value_type_type)->type->type;
++
++      assert(glsl_get_function_return_type(func_type) == result_type);
++
++      nir_function *func =
++         nir_function_create(b->shader, ralloc_strdup(b->shader, val->name));
++
++      func->num_params = glsl_get_length(func_type);
++      func->params = ralloc_array(b->shader, nir_parameter, func->num_params);
++      for (unsigned i = 0; i < func->num_params; i++) {
++         const struct glsl_function_param *param =
++            glsl_get_function_param(func_type, i);
++         func->params[i].type = param->type;
++         if (param->in) {
++            if (param->out) {
++               func->params[i].param_type = nir_parameter_inout;
++            } else {
++               func->params[i].param_type = nir_parameter_in;
++            }
++         } else {
++            if (param->out) {
++               func->params[i].param_type = nir_parameter_out;
++            } else {
++               assert(!"Parameter is neither in nor out");
++            }
++         }
++      }
++
++      func->return_type = glsl_get_function_return_type(func_type);
++
++      b->func->impl = nir_function_impl_create(func);
++      if (!glsl_type_is_void(func->return_type)) {
++         b->func->impl->return_var =
++            nir_local_variable_create(b->func->impl, func->return_type, "ret");
++      }
++
++      b->func_param_idx = 0;
++      break;
++   }
++
++   case SpvOpFunctionEnd:
++      b->func->end = w;
++      b->func = NULL;
++      break;
++
++   case SpvOpFunctionParameter: {
++      struct vtn_value *val =
++         vtn_push_value(b, w[2], vtn_value_type_access_chain);
++
++      assert(b->func_param_idx < b->func->impl->num_params);
++      unsigned idx = b->func_param_idx++;
++
++      nir_variable *param =
++         nir_local_variable_create(b->func->impl,
++                                   b->func->impl->function->params[idx].type,
++                                   val->name);
++      b->func->impl->params[idx] = param;
++
++      struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable);
++      vtn_var->mode = vtn_variable_mode_param;
++      vtn_var->type = vtn_value(b, w[1], vtn_value_type_type)->type;
++      vtn_var->var = param;
++      vtn_var->chain.var = vtn_var;
++      vtn_var->chain.length = 0;
++
++      val->access_chain = &vtn_var->chain;
++      break;
++   }
++
++   case SpvOpLabel: {
++      assert(b->block == NULL);
++      b->block = rzalloc(b, struct vtn_block);
++      b->block->node.type = vtn_cf_node_type_block;
++      b->block->label = w;
++      vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block;
++
++      if (b->func->start_block == NULL) {
++         /* This is the first block encountered for this function.  In this
++          * case, we set the start block and add it to the list of
++          * implemented functions that we'll walk later.
++          */
++         b->func->start_block = b->block;
++         exec_list_push_tail(&b->functions, &b->func->node);
++      }
++      break;
++   }
++
++   case SpvOpSelectionMerge:
++   case SpvOpLoopMerge:
++      assert(b->block && b->block->merge == NULL);
++      b->block->merge = w;
++      break;
++
++   case SpvOpBranch:
++   case SpvOpBranchConditional:
++   case SpvOpSwitch:
++   case SpvOpKill:
++   case SpvOpReturn:
++   case SpvOpReturnValue:
++   case SpvOpUnreachable:
++      assert(b->block && b->block->branch == NULL);
++      b->block->branch = w;
++      b->block = NULL;
++      break;
++
++   default:
++      /* Continue on as per normal */
++      return true;
++   }
++
++   return true;
++}
++
++static void
++vtn_add_case(struct vtn_builder *b, struct vtn_switch *swtch,
++             struct vtn_block *break_block,
++             uint32_t block_id, uint32_t val, bool is_default)
++{
++   struct vtn_block *case_block =
++      vtn_value(b, block_id, vtn_value_type_block)->block;
++
++   /* Don't create dummy cases that just break */
++   if (case_block == break_block)
++      return;
++
++   if (case_block->switch_case == NULL) {
++      struct vtn_case *c = ralloc(b, struct vtn_case);
++
++      list_inithead(&c->body);
++      c->start_block = case_block;
++      c->fallthrough = NULL;
++      nir_array_init(&c->values, b);
++      c->is_default = false;
++      c->visited = false;
++
++      list_addtail(&c->link, &swtch->cases);
++
++      case_block->switch_case = c;
++   }
++
++   if (is_default) {
++      case_block->switch_case->is_default = true;
++   } else {
++      nir_array_add(&case_block->switch_case->values, uint32_t, val);
++   }
++}
++
++/* This function performs a depth-first search of the cases and puts them
++ * in fall-through order.
++ */
++static void
++vtn_order_case(struct vtn_switch *swtch, struct vtn_case *cse)
++{
++   if (cse->visited)
++      return;
++
++   cse->visited = true;
++
++   list_del(&cse->link);
++
++   if (cse->fallthrough) {
++      vtn_order_case(swtch, cse->fallthrough);
++
++      /* If we have a fall-through, place this case right before the case it
++       * falls through to.  This ensures that fallthroughs come one after
++       * the other.  These two can never get separated because that would
++       * imply something else falling through to the same case.  Also, this
++       * can't break ordering because the DFS ensures that this case is
++       * visited before anything that falls through to it.
++       */
++      list_addtail(&cse->link, &cse->fallthrough->link);
++   } else {
++      list_add(&cse->link, &swtch->cases);
++   }
++}
++
++static enum vtn_branch_type
++vtn_get_branch_type(struct vtn_block *block,
++                    struct vtn_case *swcase, struct vtn_block *switch_break,
++                    struct vtn_block *loop_break, struct vtn_block *loop_cont)
++{
++   if (block->switch_case) {
++      /* This branch is actually a fallthrough */
++      assert(swcase->fallthrough == NULL ||
++             swcase->fallthrough == block->switch_case);
++      swcase->fallthrough = block->switch_case;
++      return vtn_branch_type_switch_fallthrough;
++   } else if (block == switch_break) {
++      return vtn_branch_type_switch_break;
++   } else if (block == loop_break) {
++      return vtn_branch_type_loop_break;
++   } else if (block == loop_cont) {
++      return vtn_branch_type_loop_continue;
++   } else {
++      return vtn_branch_type_none;
++   }
++}
++
++static void
++vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list,
++                    struct vtn_block *start, struct vtn_case *switch_case,
++                    struct vtn_block *switch_break,
++                    struct vtn_block *loop_break, struct vtn_block *loop_cont,
++                    struct vtn_block *end)
++{
++   struct vtn_block *block = start;
++   while (block != end) {
++      if (block->merge && (*block->merge & SpvOpCodeMask) == SpvOpLoopMerge &&
++          !block->loop) {
++         struct vtn_loop *loop = ralloc(b, struct vtn_loop);
++
++         loop->node.type = vtn_cf_node_type_loop;
++         list_inithead(&loop->body);
++         list_inithead(&loop->cont_body);
++         loop->control = block->merge[3];
++
++         list_addtail(&loop->node.link, cf_list);
++         block->loop = loop;
++
++         struct vtn_block *new_loop_break =
++            vtn_value(b, block->merge[1], vtn_value_type_block)->block;
++         struct vtn_block *new_loop_cont =
++            vtn_value(b, block->merge[2], vtn_value_type_block)->block;
++
++         /* Note: This recursive call will start with the current block as
++          * its start block.  If we weren't careful, we would get here
++          * again and end up in infinite recursion.  This is why we set
++          * block->loop above and check for it before creating one.  This
++          * way, we only create the loop once and the second call that
++          * tries to handle this loop goes to the cases below and gets
++          * handled as a regular block.
++          *
++          * Note: When we make the recursive walk calls, we pass NULL for
++          * the switch break since you have to break out of the loop first.
++          * We do, however, still pass the current switch case because it's
++          * possible that the merge block for the loop is the start of
++          * another case.
++          */
++         vtn_cfg_walk_blocks(b, &loop->body, block, switch_case, NULL,
++                             new_loop_break, new_loop_cont, NULL );
++         vtn_cfg_walk_blocks(b, &loop->cont_body, new_loop_cont, NULL, NULL,
++                             new_loop_break, NULL, block);
++
++         block = new_loop_break;
++         continue;
++      }
++
++      assert(block->node.link.next == NULL);
++      list_addtail(&block->node.link, cf_list);
++
++      switch (*block->branch & SpvOpCodeMask) {
++      case SpvOpBranch: {
++         struct vtn_block *branch_block =
++            vtn_value(b, block->branch[1], vtn_value_type_block)->block;
++
++         block->branch_type = vtn_get_branch_type(branch_block,
++                                                  switch_case, switch_break,
++                                                  loop_break, loop_cont);
++
++         if (block->branch_type != vtn_branch_type_none)
++            return;
++
++         block = branch_block;
++         continue;
++      }
++
++      case SpvOpReturn:
++      case SpvOpReturnValue:
++         block->branch_type = vtn_branch_type_return;
++         return;
++
++      case SpvOpKill:
++         block->branch_type = vtn_branch_type_discard;
++         return;
++
++      case SpvOpBranchConditional: {
++         struct vtn_block *then_block =
++            vtn_value(b, block->branch[2], vtn_value_type_block)->block;
++         struct vtn_block *else_block =
++            vtn_value(b, block->branch[3], vtn_value_type_block)->block;
++
++         struct vtn_if *if_stmt = ralloc(b, struct vtn_if);
++
++         if_stmt->node.type = vtn_cf_node_type_if;
++         if_stmt->condition = block->branch[1];
++         list_inithead(&if_stmt->then_body);
++         list_inithead(&if_stmt->else_body);
++
++         list_addtail(&if_stmt->node.link, cf_list);
++
++         if (block->merge &&
++             (*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) {
++            if_stmt->control = block->merge[2];
++         }
++
++         if_stmt->then_type = vtn_get_branch_type(then_block,
++                                                  switch_case, switch_break,
++                                                  loop_break, loop_cont);
++         if_stmt->else_type = vtn_get_branch_type(else_block,
++                                                  switch_case, switch_break,
++                                                  loop_break, loop_cont);
++
++         if (if_stmt->then_type == vtn_branch_type_none &&
++             if_stmt->else_type == vtn_branch_type_none) {
++            /* Neither side of the if is something we can short-circuit. */
++            assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge);
++            struct vtn_block *merge_block =
++               vtn_value(b, block->merge[1], vtn_value_type_block)->block;
++
++            vtn_cfg_walk_blocks(b, &if_stmt->then_body, then_block,
++                                switch_case, switch_break,
++                                loop_break, loop_cont, merge_block);
++            vtn_cfg_walk_blocks(b, &if_stmt->else_body, else_block,
++                                switch_case, switch_break,
++                                loop_break, loop_cont, merge_block);
++
++            enum vtn_branch_type merge_type =
++               vtn_get_branch_type(merge_block, switch_case, switch_break,
++                                   loop_break, loop_cont);
++            if (merge_type == vtn_branch_type_none) {
++               block = merge_block;
++               continue;
++            } else {
++               return;
++            }
++         } else if (if_stmt->then_type != vtn_branch_type_none &&
++                    if_stmt->else_type != vtn_branch_type_none) {
++            /* Both sides were short-circuited.  We're done here. */
++            return;
++         } else {
++            /* Exeactly one side of the branch could be short-circuited.
++             * We set the branch up as a predicated break/continue and we
++             * continue on with the other side as if it were what comes
++             * after the if.
++             */
++            if (if_stmt->then_type == vtn_branch_type_none) {
++               block = then_block;
++            } else {
++               block = else_block;
++            }
++            continue;
++         }
++         unreachable("Should have returned or continued");
++      }
++
++      case SpvOpSwitch: {
++         assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge);
++         struct vtn_block *break_block =
++            vtn_value(b, block->merge[1], vtn_value_type_block)->block;
++
++         struct vtn_switch *swtch = ralloc(b, struct vtn_switch);
++
++         swtch->node.type = vtn_cf_node_type_switch;
++         swtch->selector = block->branch[1];
++         list_inithead(&swtch->cases);
++
++         list_addtail(&swtch->node.link, cf_list);
++
++         /* First, we go through and record all of the cases. */
++         const uint32_t *branch_end =
++            block->branch + (block->branch[0] >> SpvWordCountShift);
++
++         vtn_add_case(b, swtch, break_block, block->branch[2], 0, true);
++         for (const uint32_t *w = block->branch + 3; w < branch_end; w += 2)
++            vtn_add_case(b, swtch, break_block, w[1], w[0], false);
++
++         /* Now, we go through and walk the blocks.  While we walk through
++          * the blocks, we also gather the much-needed fall-through
++          * information.
++          */
++         list_for_each_entry(struct vtn_case, cse, &swtch->cases, link) {
++            assert(cse->start_block != break_block);
++            vtn_cfg_walk_blocks(b, &cse->body, cse->start_block, cse,
++                                break_block, NULL, loop_cont, NULL);
++         }
++
++         /* Finally, we walk over all of the cases one more time and put
++          * them in fall-through order.
++          */
++         for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) {
++            struct vtn_block *case_block =
++               vtn_value(b, *w, vtn_value_type_block)->block;
++
++            if (case_block == break_block)
++               continue;
++
++            assert(case_block->switch_case);
++
++            vtn_order_case(swtch, case_block->switch_case);
++         }
++
++         block = break_block;
++         continue;
++      }
++
++      case SpvOpUnreachable:
++         return;
++
++      default:
++         unreachable("Unhandled opcode");
++      }
++   }
++}
++
++void
++vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end)
++{
++   vtn_foreach_instruction(b, words, end,
++                           vtn_cfg_handle_prepass_instruction);
++
++   foreach_list_typed(struct vtn_function, func, node, &b->functions) {
++      vtn_cfg_walk_blocks(b, &func->body, func->start_block,
++                          NULL, NULL, NULL, NULL, NULL);
++   }
++}
++
++static bool
++vtn_handle_phis_first_pass(struct vtn_builder *b, SpvOp opcode,
++                           const uint32_t *w, unsigned count)
++{
++   if (opcode == SpvOpLabel)
++      return true; /* Nothing to do */
++
++   /* If this isn't a phi node, stop. */
++   if (opcode != SpvOpPhi)
++      return false;
++
++   /* For handling phi nodes, we do a poor-man's out-of-ssa on the spot.
++    * For each phi, we create a variable with the appropreate type and
++    * do a load from that variable.  Then, in a second pass, we add
++    * stores to that variable to each of the predecessor blocks.
++    *
++    * We could do something more intelligent here.  However, in order to
++    * handle loops and things properly, we really need dominance
++    * information.  It would end up basically being the into-SSA
++    * algorithm all over again.  It's easier if we just let
++    * lower_vars_to_ssa do that for us instead of repeating it here.
++    */
++   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++
++   struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
++   nir_variable *phi_var =
++      nir_local_variable_create(b->nb.impl, type->type, "phi");
++   _mesa_hash_table_insert(b->phi_table, w, phi_var);
++
++   val->ssa = vtn_local_load(b, nir_deref_var_create(b, phi_var));
++
++   return true;
++}
++
++static bool
++vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode,
++                           const uint32_t *w, unsigned count)
++{
++   if (opcode != SpvOpPhi)
++      return true;
++
++   struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w);
++   assert(phi_entry);
++   nir_variable *phi_var = phi_entry->data;
++
++   for (unsigned i = 3; i < count; i += 2) {
++      struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
++      struct vtn_block *pred =
++         vtn_value(b, w[i + 1], vtn_value_type_block)->block;
++
++      b->nb.cursor = nir_after_block_before_jump(pred->end_block);
++
++      vtn_local_store(b, src, nir_deref_var_create(b, phi_var));
++   }
++
++   return true;
++}
++
++static void
++vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type,
++                nir_variable *switch_fall_var, bool *has_switch_break)
++{
++   switch (branch_type) {
++   case vtn_branch_type_switch_break:
++      nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1);
++      *has_switch_break = true;
++      break;
++   case vtn_branch_type_switch_fallthrough:
++      break; /* Nothing to do */
++   case vtn_branch_type_loop_break:
++      nir_jump(&b->nb, nir_jump_break);
++      break;
++   case vtn_branch_type_loop_continue:
++      nir_jump(&b->nb, nir_jump_continue);
++      break;
++   case vtn_branch_type_return:
++      nir_jump(&b->nb, nir_jump_return);
++      break;
++   case vtn_branch_type_discard: {
++      nir_intrinsic_instr *discard =
++         nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_discard);
++      nir_builder_instr_insert(&b->nb, &discard->instr);
++      break;
++   }
++   default:
++      unreachable("Invalid branch type");
++   }
++}
++
++static void
++vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list,
++                 nir_variable *switch_fall_var, bool *has_switch_break,
++                 vtn_instruction_handler handler)
++{
++   list_for_each_entry(struct vtn_cf_node, node, cf_list, link) {
++      switch (node->type) {
++      case vtn_cf_node_type_block: {
++         struct vtn_block *block = (struct vtn_block *)node;
++
++         const uint32_t *block_start = block->label;
++         const uint32_t *block_end = block->merge ? block->merge :
++                                                    block->branch;
++
++         block_start = vtn_foreach_instruction(b, block_start, block_end,
++                                               vtn_handle_phis_first_pass);
++
++         vtn_foreach_instruction(b, block_start, block_end, handler);
++
++         block->end_block = nir_cursor_current_block(b->nb.cursor);
++
++         if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) {
++            struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]);
++            vtn_local_store(b, src,
++                            nir_deref_var_create(b, b->impl->return_var));
++         }
++
++         if (block->branch_type != vtn_branch_type_none) {
++            vtn_emit_branch(b, block->branch_type,
++                            switch_fall_var, has_switch_break);
++         }
++
++         break;
++      }
++
++      case vtn_cf_node_type_if: {
++         struct vtn_if *vtn_if = (struct vtn_if *)node;
++
++         nir_if *if_stmt = nir_if_create(b->shader);
++         if_stmt->condition =
++            nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def);
++         nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node);
++
++         bool sw_break = false;
++
++         b->nb.cursor = nir_after_cf_list(&if_stmt->then_list);
++         if (vtn_if->then_type == vtn_branch_type_none) {
++            vtn_emit_cf_list(b, &vtn_if->then_body,
++                             switch_fall_var, &sw_break, handler);
++         } else {
++            vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break);
++         }
++
++         b->nb.cursor = nir_after_cf_list(&if_stmt->else_list);
++         if (vtn_if->else_type == vtn_branch_type_none) {
++            vtn_emit_cf_list(b, &vtn_if->else_body,
++                             switch_fall_var, &sw_break, handler);
++         } else {
++            vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break);
++         }
++
++         b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node);
++
++         /* If we encountered a switch break somewhere inside of the if,
++          * then it would have been handled correctly by calling
++          * emit_cf_list or emit_branch for the interrior.  However, we
++          * need to predicate everything following on wether or not we're
++          * still going.
++          */
++         if (sw_break) {
++            *has_switch_break = true;
++
++            nir_if *switch_if = nir_if_create(b->shader);
++            switch_if->condition =
++               nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var));
++            nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node);
++
++            b->nb.cursor = nir_after_cf_list(&if_stmt->then_list);
++         }
++         break;
++      }
++
++      case vtn_cf_node_type_loop: {
++         struct vtn_loop *vtn_loop = (struct vtn_loop *)node;
++
++         nir_loop *loop = nir_loop_create(b->shader);
++         nir_cf_node_insert(b->nb.cursor, &loop->cf_node);
++
++         b->nb.cursor = nir_after_cf_list(&loop->body);
++         vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler);
++
++         if (!list_empty(&vtn_loop->cont_body)) {
++            /* If we have a non-trivial continue body then we need to put
++             * it at the beginning of the loop with a flag to ensure that
++             * it doesn't get executed in the first iteration.
++             */
++            nir_variable *do_cont =
++               nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont");
++
++            b->nb.cursor = nir_before_cf_node(&loop->cf_node);
++            nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1);
++
++            b->nb.cursor = nir_before_cf_list(&loop->body);
++            nir_if *cont_if = nir_if_create(b->shader);
++            cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont));
++            nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node);
++
++            b->nb.cursor = nir_after_cf_list(&cont_if->then_list);
++            vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL, handler);
++
++            b->nb.cursor = nir_after_cf_node(&cont_if->cf_node);
++            nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1);
++
++            b->has_loop_continue = true;
++         }
++
++         b->nb.cursor = nir_after_cf_node(&loop->cf_node);
++         break;
++      }
++
++      case vtn_cf_node_type_switch: {
++         struct vtn_switch *vtn_switch = (struct vtn_switch *)node;
++
++         /* First, we create a variable to keep track of whether or not the
++          * switch is still going at any given point.  Any switch breaks
++          * will set this variable to false.
++          */
++         nir_variable *fall_var =
++            nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall");
++         nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1);
++
++         /* Next, we gather up all of the conditions.  We have to do this
++          * up-front because we also need to build an "any" condition so
++          * that we can use !any for default.
++          */
++         const int num_cases = list_length(&vtn_switch->cases);
++         NIR_VLA(nir_ssa_def *, conditions, num_cases);
++
++         nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def;
++         /* An accumulation of all conditions.  Used for the default */
++         nir_ssa_def *any = NULL;
++
++         int i = 0;
++         list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) {
++            if (cse->is_default) {
++               conditions[i++] = NULL;
++               continue;
++            }
++
++            nir_ssa_def *cond = NULL;
++            nir_array_foreach(&cse->values, uint32_t, val) {
++               nir_ssa_def *is_val =
++                  nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val));
++
++               cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val;
++            }
++
++            any = any ? nir_ior(&b->nb, any, cond) : cond;
++            conditions[i++] = cond;
++         }
++         assert(i == num_cases);
++
++         /* Now we can walk the list of cases and actually emit code */
++         i = 0;
++         list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) {
++            /* Figure out the condition */
++            nir_ssa_def *cond = conditions[i++];
++            if (cse->is_default) {
++               assert(cond == NULL);
++               cond = nir_inot(&b->nb, any);
++            }
++            /* Take fallthrough into account */
++            cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var));
++
++            nir_if *case_if = nir_if_create(b->nb.shader);
++            case_if->condition = nir_src_for_ssa(cond);
++            nir_cf_node_insert(b->nb.cursor, &case_if->cf_node);
++
++            bool has_break = false;
++            b->nb.cursor = nir_after_cf_list(&case_if->then_list);
++            nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1);
++            vtn_emit_cf_list(b, &cse->body, fall_var, &has_break, handler);
++            (void)has_break; /* We don't care */
++
++            b->nb.cursor = nir_after_cf_node(&case_if->cf_node);
++         }
++         assert(i == num_cases);
++
++         break;
++      }
++
++      default:
++         unreachable("Invalid CF node type");
++      }
++   }
++}
++
++void
++vtn_function_emit(struct vtn_builder *b, struct vtn_function *func,
++                  vtn_instruction_handler instruction_handler)
++{
++   nir_builder_init(&b->nb, func->impl);
++   b->nb.cursor = nir_after_cf_list(&func->impl->body);
++   b->has_loop_continue = false;
++   b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
++                                          _mesa_key_pointer_equal);
++
++   vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler);
++
++   vtn_foreach_instruction(b, func->start_block->label, func->end,
++                           vtn_handle_phi_second_pass);
++
++   /* Continue blocks for loops get inserted before the body of the loop
++    * but instructions in the continue may use SSA defs in the loop body.
++    * Therefore, we need to repair SSA to insert the needed phi nodes.
++    */
++   if (b->has_loop_continue)
++      nir_repair_ssa_impl(func->impl);
++}
diff --cc src/compiler/nir/spirv/vtn_glsl450.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..bc38aa4b1be3e727f6c0b7843eafeb372053f51b

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/spirv/vtn_glsl450.c
@@@ -1,0 -1,0 +1,684 @@@
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ *
++ * Authors:
++ *    Jason Ekstrand (jason@jlekstrand.net)
++ *
++ */
++
++#include "vtn_private.h"
++#include "GLSL.std.450.h"
++
++#define M_PIf   ((float) M_PI)
++#define M_PI_2f ((float) M_PI_2)
++#define M_PI_4f ((float) M_PI_4)
++
++static nir_ssa_def *
++build_mat2_det(nir_builder *b, nir_ssa_def *col[2])
++{
++   unsigned swiz[4] = {1, 0, 0, 0};
++   nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true));
++   return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1));
++}
++
++static nir_ssa_def *
++build_mat3_det(nir_builder *b, nir_ssa_def *col[3])
++{
++   unsigned yzx[4] = {1, 2, 0, 0};
++   unsigned zxy[4] = {2, 0, 1, 0};
++
++   nir_ssa_def *prod0 =
++      nir_fmul(b, col[0],
++               nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true),
++                           nir_swizzle(b, col[2], zxy, 3, true)));
++   nir_ssa_def *prod1 =
++      nir_fmul(b, col[0],
++               nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true),
++                           nir_swizzle(b, col[2], yzx, 3, true)));
++
++   nir_ssa_def *diff = nir_fsub(b, prod0, prod1);
++
++   return nir_fadd(b, nir_channel(b, diff, 0),
++                      nir_fadd(b, nir_channel(b, diff, 1),
++                                  nir_channel(b, diff, 2)));
++}
++
++static nir_ssa_def *
++build_mat4_det(nir_builder *b, nir_ssa_def **col)
++{
++   nir_ssa_def *subdet[4];
++   for (unsigned i = 0; i < 4; i++) {
++      unsigned swiz[3];
++      for (unsigned j = 0, k = 0; j < 3; j++, k++) {
++         if (k == i)
++            k++; /* skip column */
++         swiz[j] = k;
++      }
++
++      nir_ssa_def *subcol[3];
++      subcol[0] = nir_swizzle(b, col[1], swiz, 3, true);
++      subcol[1] = nir_swizzle(b, col[2], swiz, 3, true);
++      subcol[2] = nir_swizzle(b, col[3], swiz, 3, true);
++
++      subdet[i] = build_mat3_det(b, subcol);
++   }
++
++   nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4));
++
++   return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0),
++                                  nir_channel(b, prod, 1)),
++                      nir_fsub(b, nir_channel(b, prod, 2),
++                                  nir_channel(b, prod, 3)));
++}
++
++static nir_ssa_def *
++build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src)
++{
++   unsigned size = glsl_get_vector_elements(src->type);
++
++   nir_ssa_def *cols[4];
++   for (unsigned i = 0; i < size; i++)
++      cols[i] = src->elems[i]->def;
++
++   switch(size) {
++   case 2: return build_mat2_det(&b->nb, cols);
++   case 3: return build_mat3_det(&b->nb, cols);
++   case 4: return build_mat4_det(&b->nb, cols);
++   default:
++      unreachable("Invalid matrix size");
++   }
++}
++
++/* Computes the determinate of the submatrix given by taking src and
++ * removing the specified row and column.
++ */
++static nir_ssa_def *
++build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src,
++                 unsigned size, unsigned row, unsigned col)
++{
++   assert(row < size && col < size);
++   if (size == 2) {
++      return nir_channel(b, src->elems[1 - col]->def, 1 - row);
++   } else {
++      /* Swizzle to get all but the specified row */
++      unsigned swiz[3];
++      for (unsigned j = 0; j < 4; j++)
++         swiz[j - (j > row)] = j;
++
++      /* Grab all but the specified column */
++      nir_ssa_def *subcol[3];
++      for (unsigned j = 0; j < size; j++) {
++         if (j != col) {
++            subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def,
++                                                swiz, size - 1, true);
++         }
++      }
++
++      if (size == 3) {
++         return build_mat2_det(b, subcol);
++      } else {
++         assert(size == 4);
++         return build_mat3_det(b, subcol);
++      }
++   }
++}
++
++static struct vtn_ssa_value *
++matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src)
++{
++   nir_ssa_def *adj_col[4];
++   unsigned size = glsl_get_vector_elements(src->type);
++
++   /* Build up an adjugate matrix */
++   for (unsigned c = 0; c < size; c++) {
++      nir_ssa_def *elem[4];
++      for (unsigned r = 0; r < size; r++) {
++         elem[r] = build_mat_subdet(&b->nb, src, size, c, r);
++
++         if ((r + c) % 2)
++            elem[r] = nir_fneg(&b->nb, elem[r]);
++      }
++
++      adj_col[c] = nir_vec(&b->nb, elem, size);
++   }
++
++   nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src));
++
++   struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type);
++   for (unsigned i = 0; i < size; i++)
++      val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv);
++
++   return val;
++}
++
++static nir_ssa_def*
++build_length(nir_builder *b, nir_ssa_def *vec)
++{
++   switch (vec->num_components) {
++   case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec));
++   case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec));
++   case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec));
++   case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec));
++   default:
++      unreachable("Invalid number of components");
++   }
++}
++
++static inline nir_ssa_def *
++build_fclamp(nir_builder *b,
++             nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
++{
++   return nir_fmin(b, nir_fmax(b, x, min_val), max_val);
++}
++
++/**
++ * Return e^x.
++ */
++static nir_ssa_def *
++build_exp(nir_builder *b, nir_ssa_def *x)
++{
++   return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E)));
++}
++
++/**
++ * Return ln(x) - the natural logarithm of x.
++ */
++static nir_ssa_def *
++build_log(nir_builder *b, nir_ssa_def *x)
++{
++   return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E));
++}
++
++static nir_ssa_def *
++build_asin(nir_builder *b, nir_ssa_def *x)
++{
++   /*
++    * asin(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi / 4 - 1 + |x| * (0.086566724 + |x| * -0.03102955)))
++    */
++   nir_ssa_def *abs_x = nir_fabs(b, x);
++   return nir_fmul(b, nir_fsign(b, x),
++                   nir_fsub(b, nir_imm_float(b, M_PI_2f),
++                            nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)),
++                                     nir_fadd(b, nir_imm_float(b, M_PI_2f),
++                                              nir_fmul(b, abs_x,
++                                                       nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f),
++                                                                nir_fmul(b, abs_x,
++                                                                         nir_fadd(b, nir_imm_float(b, 0.086566724f),
++                                                                                  nir_fmul(b, abs_x,
++                                                                                           nir_imm_float(b, -0.03102955f))))))))));
++}
++
++static nir_ssa_def *
++build_acos(nir_builder *b, nir_ssa_def *x)
++{
++   /*
++    * poly(x) = sign(x) * sqrt(1 - |x|) * (pi / 2 + |x| * (pi / 4 - 1 + |x| * (0.08132463 + |x| * -0.02363318)))
++    */
++   nir_ssa_def *abs_x = nir_fabs(b, x);
++   nir_ssa_def *poly = nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)),
++                                nir_fadd(b, nir_imm_float(b, M_PI_2f),
++                                         nir_fmul(b, abs_x,
++                                                  nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f),
++                                                           nir_fmul(b, abs_x,
++                                                                    nir_fadd(b, nir_imm_float(b, 0.08132463f),
++                                                                             nir_fmul(b, abs_x,
++                                                                                      nir_imm_float(b, -0.02363318f))))))));
++   return nir_bcsel(b, nir_flt(b, x, nir_imm_float(b, 0)),
++                       nir_fsub(b, nir_imm_float(b, M_PI), poly),
++                       poly);
++}
++
++/**
++ * Compute xs[0] + xs[1] + xs[2] + ... using fadd.
++ */
++static nir_ssa_def *
++build_fsum(nir_builder *b, nir_ssa_def **xs, int terms)
++{
++   nir_ssa_def *accum = xs[0];
++
++   for (int i = 1; i < terms; i++)
++      accum = nir_fadd(b, accum, xs[i]);
++
++   return accum;
++}
++
++static nir_ssa_def *
++build_atan(nir_builder *b, nir_ssa_def *y_over_x)
++{
++   nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x);
++   nir_ssa_def *one = nir_imm_float(b, 1.0f);
++
++   /*
++    * range-reduction, first step:
++    *
++    *      / y_over_x         if |y_over_x| <= 1.0;
++    * x = <
++    *      \ 1.0 / y_over_x   otherwise
++    */
++   nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one),
++                                nir_fmax(b, abs_y_over_x, one));
++
++   /*
++    * approximate atan by evaluating polynomial:
++    *
++    * x   * 0.9999793128310355 - x^3  * 0.3326756418091246 +
++    * x^5 * 0.1938924977115610 - x^7  * 0.1173503194786851 +
++    * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
++    */
++   nir_ssa_def *x_2  = nir_fmul(b, x,   x);
++   nir_ssa_def *x_3  = nir_fmul(b, x_2, x);
++   nir_ssa_def *x_5  = nir_fmul(b, x_3, x_2);
++   nir_ssa_def *x_7  = nir_fmul(b, x_5, x_2);
++   nir_ssa_def *x_9  = nir_fmul(b, x_7, x_2);
++   nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2);
++
++   nir_ssa_def *polynomial_terms[] = {
++      nir_fmul(b, x,    nir_imm_float(b,  0.9999793128310355f)),
++      nir_fmul(b, x_3,  nir_imm_float(b, -0.3326756418091246f)),
++      nir_fmul(b, x_5,  nir_imm_float(b,  0.1938924977115610f)),
++      nir_fmul(b, x_7,  nir_imm_float(b, -0.1173503194786851f)),
++      nir_fmul(b, x_9,  nir_imm_float(b,  0.0536813784310406f)),
++      nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)),
++   };
++
++   nir_ssa_def *tmp =
++      build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms));
++
++   /* range-reduction fixup */
++   tmp = nir_fadd(b, tmp,
++                  nir_fmul(b,
++                           nir_b2f(b, nir_flt(b, one, abs_y_over_x)),
++                           nir_fadd(b, nir_fmul(b, tmp,
++                                                nir_imm_float(b, -2.0f)),
++                                       nir_imm_float(b, M_PI_2f))));
++
++   /* sign fixup */
++   return nir_fmul(b, tmp, nir_fsign(b, y_over_x));
++}
++
++static nir_ssa_def *
++build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x)
++{
++   nir_ssa_def *zero = nir_imm_float(b, 0.0f);
++
++   /* If |x| >= 1.0e-8 * |y|: */
++   nir_ssa_def *condition =
++      nir_fge(b, nir_fabs(b, x),
++              nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y)));
++
++   /* Then...call atan(y/x) and fix it up: */
++   nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x));
++   nir_ssa_def *r_then =
++      nir_bcsel(b, nir_flt(b, x, zero),
++                   nir_fadd(b, atan1,
++                               nir_bcsel(b, nir_fge(b, y, zero),
++                                            nir_imm_float(b, M_PIf),
++                                            nir_imm_float(b, -M_PIf))),
++                   atan1);
++
++   /* Else... */
++   nir_ssa_def *r_else =
++      nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f));
++
++   return nir_bcsel(b, condition, r_then, r_else);
++}
++
++static nir_ssa_def *
++build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent)
++{
++   nir_ssa_def *abs_x = nir_fabs(b, x);
++   nir_ssa_def *zero = nir_imm_float(b, 0.0f);
++
++   /* Single-precision floating-point values are stored as
++    *   1 sign bit;
++    *   8 exponent bits;
++    *   23 mantissa bits.
++    *
++    * An exponent shift of 23 will shift the mantissa out, leaving only the
++    * exponent and sign bit (which itself may be zero, if the absolute value
++    * was taken before the bitcast and shift.
++    */
++   nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
++   nir_ssa_def *exponent_bias = nir_imm_int(b, -126);
++
++   nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
++
++   /* Exponent of floating-point values in the range [0.5, 1.0). */
++   nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u);
++
++   nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
++
++   *exponent =
++      nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
++                  nir_bcsel(b, is_not_zero, exponent_bias, zero));
++
++   return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
++                     nir_bcsel(b, is_not_zero, exponent_value, zero));
++}
++
++static void
++handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
++                   const uint32_t *w, unsigned count)
++{
++   struct nir_builder *nb = &b->nb;
++   const struct glsl_type *dest_type =
++      vtn_value(b, w[1], vtn_value_type_type)->type->type;
++
++   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++   val->ssa = vtn_create_ssa_value(b, dest_type);
++
++   /* Collect the various SSA sources */
++   unsigned num_inputs = count - 5;
++   nir_ssa_def *src[3];
++   for (unsigned i = 0; i < num_inputs; i++)
++      src[i] = vtn_ssa_value(b, w[i + 5])->def;
++
++   nir_op op;
++   switch (entrypoint) {
++   case GLSLstd450Round:       op = nir_op_fround_even;   break; /* TODO */
++   case GLSLstd450RoundEven:   op = nir_op_fround_even;   break;
++   case GLSLstd450Trunc:       op = nir_op_ftrunc;        break;
++   case GLSLstd450FAbs:        op = nir_op_fabs;          break;
++   case GLSLstd450SAbs:        op = nir_op_iabs;          break;
++   case GLSLstd450FSign:       op = nir_op_fsign;         break;
++   case GLSLstd450SSign:       op = nir_op_isign;         break;
++   case GLSLstd450Floor:       op = nir_op_ffloor;        break;
++   case GLSLstd450Ceil:        op = nir_op_fceil;         break;
++   case GLSLstd450Fract:       op = nir_op_ffract;        break;
++   case GLSLstd450Radians:
++      val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251));
++      return;
++   case GLSLstd450Degrees:
++      val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131));
++      return;
++   case GLSLstd450Sin:         op = nir_op_fsin;       break;
++   case GLSLstd450Cos:         op = nir_op_fcos;       break;
++   case GLSLstd450Tan:
++      val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]),
++                               nir_fcos(nb, src[0]));
++      return;
++   case GLSLstd450Pow:         op = nir_op_fpow;       break;
++   case GLSLstd450Exp2:        op = nir_op_fexp2;      break;
++   case GLSLstd450Log2:        op = nir_op_flog2;      break;
++   case GLSLstd450Sqrt:        op = nir_op_fsqrt;      break;
++   case GLSLstd450InverseSqrt: op = nir_op_frsq;       break;
++
++   case GLSLstd450Modf: {
++      nir_ssa_def *sign = nir_fsign(nb, src[0]);
++      nir_ssa_def *abs = nir_fabs(nb, src[0]);
++      val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
++      nir_store_deref_var(nb, vtn_nir_deref(b, w[6]),
++                          nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf);
++      return;
++   }
++
++   case GLSLstd450ModfStruct: {
++      nir_ssa_def *sign = nir_fsign(nb, src[0]);
++      nir_ssa_def *abs = nir_fabs(nb, src[0]);
++      assert(glsl_type_is_struct(val->ssa->type));
++      val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
++      val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs));
++      return;
++   }
++
++   case GLSLstd450FMin:        op = nir_op_fmin;       break;
++   case GLSLstd450UMin:        op = nir_op_umin;       break;
++   case GLSLstd450SMin:        op = nir_op_imin;       break;
++   case GLSLstd450FMax:        op = nir_op_fmax;       break;
++   case GLSLstd450UMax:        op = nir_op_umax;       break;
++   case GLSLstd450SMax:        op = nir_op_imax;       break;
++   case GLSLstd450FMix:        op = nir_op_flrp;       break;
++   case GLSLstd450Step:
++      val->ssa->def = nir_sge(nb, src[1], src[0]);
++      return;
++
++   case GLSLstd450Fma:         op = nir_op_ffma;       break;
++   case GLSLstd450Ldexp:       op = nir_op_ldexp;      break;
++
++   /* Packing/Unpacking functions */
++   case GLSLstd450PackSnorm4x8:      op = nir_op_pack_snorm_4x8;      break;
++   case GLSLstd450PackUnorm4x8:      op = nir_op_pack_unorm_4x8;      break;
++   case GLSLstd450PackSnorm2x16:     op = nir_op_pack_snorm_2x16;     break;
++   case GLSLstd450PackUnorm2x16:     op = nir_op_pack_unorm_2x16;     break;
++   case GLSLstd450PackHalf2x16:      op = nir_op_pack_half_2x16;      break;
++   case GLSLstd450UnpackSnorm4x8:    op = nir_op_unpack_snorm_4x8;    break;
++   case GLSLstd450UnpackUnorm4x8:    op = nir_op_unpack_unorm_4x8;    break;
++   case GLSLstd450UnpackSnorm2x16:   op = nir_op_unpack_snorm_2x16;   break;
++   case GLSLstd450UnpackUnorm2x16:   op = nir_op_unpack_unorm_2x16;   break;
++   case GLSLstd450UnpackHalf2x16:    op = nir_op_unpack_half_2x16;    break;
++
++   case GLSLstd450Length:
++      val->ssa->def = build_length(nb, src[0]);
++      return;
++   case GLSLstd450Distance:
++      val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1]));
++      return;
++   case GLSLstd450Normalize:
++      val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0]));
++      return;
++
++   case GLSLstd450Exp:
++      val->ssa->def = build_exp(nb, src[0]);
++      return;
++
++   case GLSLstd450Log:
++      val->ssa->def = build_log(nb, src[0]);
++      return;
++
++   case GLSLstd450FClamp:
++      val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]);
++      return;
++   case GLSLstd450UClamp:
++      val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]);
++      return;
++   case GLSLstd450SClamp:
++      val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]);
++      return;
++
++   case GLSLstd450Cross: {
++      unsigned yzx[4] = { 1, 2, 0, 0 };
++      unsigned zxy[4] = { 2, 0, 1, 0 };
++      val->ssa->def =
++         nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true),
++                                   nir_swizzle(nb, src[1], zxy, 3, true)),
++                      nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true),
++                                   nir_swizzle(nb, src[1], yzx, 3, true)));
++      return;
++   }
++
++   case GLSLstd450SmoothStep: {
++      /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
++      nir_ssa_def *t =
++         build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]),
++                                       nir_fsub(nb, src[1], src[0])),
++                          nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0));
++      /* result = t * t * (3 - 2 * t) */
++      val->ssa->def =
++         nir_fmul(nb, t, nir_fmul(nb, t,
++            nir_fsub(nb, nir_imm_float(nb, 3.0),
++                         nir_fmul(nb, nir_imm_float(nb, 2.0), t))));
++      return;
++   }
++
++   case GLSLstd450FaceForward:
++      val->ssa->def =
++         nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]),
++                                   nir_imm_float(nb, 0.0)),
++                       src[0], nir_fneg(nb, src[0]));
++      return;
++
++   case GLSLstd450Reflect:
++      /* I - 2 * dot(N, I) * N */
++      val->ssa->def =
++         nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0),
++                              nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
++                                           src[1])));
++      return;
++
++   case GLSLstd450Refract: {
++      nir_ssa_def *I = src[0];
++      nir_ssa_def *N = src[1];
++      nir_ssa_def *eta = src[2];
++      nir_ssa_def *n_dot_i = nir_fdot(nb, N, I);
++      nir_ssa_def *one = nir_imm_float(nb, 1.0);
++      nir_ssa_def *zero = nir_imm_float(nb, 0.0);
++      /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
++      nir_ssa_def *k =
++         nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta,
++                      nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i)))));
++      nir_ssa_def *result =
++         nir_fsub(nb, nir_fmul(nb, eta, I),
++                      nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i),
++                                                nir_fsqrt(nb, k)), N));
++      /* XXX: bcsel, or if statement? */
++      val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);
++      return;
++   }
++
++   case GLSLstd450Sinh:
++      /* 0.5 * (e^x - e^(-x)) */
++      val->ssa->def =
++         nir_fmul(nb, nir_imm_float(nb, 0.5f),
++                      nir_fsub(nb, build_exp(nb, src[0]),
++                                   build_exp(nb, nir_fneg(nb, src[0]))));
++      return;
++
++   case GLSLstd450Cosh:
++      /* 0.5 * (e^x + e^(-x)) */
++      val->ssa->def =
++         nir_fmul(nb, nir_imm_float(nb, 0.5f),
++                      nir_fadd(nb, build_exp(nb, src[0]),
++                                   build_exp(nb, nir_fneg(nb, src[0]))));
++      return;
++
++   case GLSLstd450Tanh:
++      /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
++      val->ssa->def =
++         nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
++                                   nir_fsub(nb, build_exp(nb, src[0]),
++                                                build_exp(nb, nir_fneg(nb, src[0])))),
++                      nir_fmul(nb, nir_imm_float(nb, 0.5f),
++                                   nir_fadd(nb, build_exp(nb, src[0]),
++                                                build_exp(nb, nir_fneg(nb, src[0])))));
++      return;
++
++   case GLSLstd450Asinh:
++      val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
++         build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
++                       nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]),
++                                                  nir_imm_float(nb, 1.0f))))));
++      return;
++   case GLSLstd450Acosh:
++      val->ssa->def = build_log(nb, nir_fadd(nb, src[0],
++         nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]),
++                                    nir_imm_float(nb, 1.0f)))));
++      return;
++   case GLSLstd450Atanh: {
++      nir_ssa_def *one = nir_imm_float(nb, 1.0);
++      val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f),
++         build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]),
++                                    nir_fsub(nb, one, src[0]))));
++      return;
++   }
++
++   case GLSLstd450FindILsb:   op = nir_op_find_lsb;   break;
++   case GLSLstd450FindSMsb:   op = nir_op_ifind_msb;  break;
++   case GLSLstd450FindUMsb:   op = nir_op_ufind_msb;  break;
++
++   case GLSLstd450Asin:
++      val->ssa->def = build_asin(nb, src[0]);
++      return;
++
++   case GLSLstd450Acos:
++      val->ssa->def = build_acos(nb, src[0]);
++      return;
++
++   case GLSLstd450Atan:
++      val->ssa->def = build_atan(nb, src[0]);
++      return;
++
++   case GLSLstd450Atan2:
++      val->ssa->def = build_atan2(nb, src[0], src[1]);
++      return;
++
++   case GLSLstd450Frexp: {
++      nir_ssa_def *exponent;
++      val->ssa->def = build_frexp(nb, src[0], &exponent);
++      nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf);
++      return;
++   }
++
++   case GLSLstd450FrexpStruct: {
++      assert(glsl_type_is_struct(val->ssa->type));
++      val->ssa->elems[0]->def = build_frexp(nb, src[0],
++                                            &val->ssa->elems[1]->def);
++      return;
++   }
++
++   case GLSLstd450PackDouble2x32:
++   case GLSLstd450UnpackDouble2x32:
++   default:
++      unreachable("Unhandled opcode");
++   }
++
++   nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
++   nir_ssa_dest_init(&instr->instr, &instr->dest.dest,
++                     glsl_get_vector_elements(val->ssa->type), val->name);
++   instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1;
++   val->ssa->def = &instr->dest.dest.ssa;
++
++   for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++)
++      instr->src[i].src = nir_src_for_ssa(src[i]);
++
++   nir_builder_instr_insert(nb, &instr->instr);
++}
++
++bool
++vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
++                               const uint32_t *w, unsigned count)
++{
++   switch ((enum GLSLstd450)ext_opcode) {
++   case GLSLstd450Determinant: {
++      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++      val->ssa = rzalloc(b, struct vtn_ssa_value);
++      val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
++      val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5]));
++      break;
++   }
++
++   case GLSLstd450MatrixInverse: {
++      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++      val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5]));
++      break;
++   }
++
++   case GLSLstd450InterpolateAtCentroid:
++   case GLSLstd450InterpolateAtSample:
++   case GLSLstd450InterpolateAtOffset:
++      unreachable("Unhandled opcode");
++
++   default:
++      handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count);
++   }
++
++   return true;
++}
diff --cc src/compiler/nir/spirv/vtn_private.h

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..3840d8c4b650f59d81b324666d3f2e355c8aa7fe

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/spirv/vtn_private.h
@@@ -1,0 -1,0 +1,484 @@@
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ *
++ * Authors:
++ *    Jason Ekstrand (jason@jlekstrand.net)
++ *
++ */
++
++#include "nir/nir.h"
++#include "nir/nir_builder.h"
++#include "nir/nir_array.h"
++#include "nir_spirv.h"
++#include "spirv.h"
++
++struct vtn_builder;
++struct vtn_decoration;
++
++enum vtn_value_type {
++   vtn_value_type_invalid = 0,
++   vtn_value_type_undef,
++   vtn_value_type_string,
++   vtn_value_type_decoration_group,
++   vtn_value_type_type,
++   vtn_value_type_constant,
++   vtn_value_type_access_chain,
++   vtn_value_type_function,
++   vtn_value_type_block,
++   vtn_value_type_ssa,
++   vtn_value_type_extension,
++   vtn_value_type_image_pointer,
++   vtn_value_type_sampled_image,
++};
++
++enum vtn_branch_type {
++   vtn_branch_type_none,
++   vtn_branch_type_switch_break,
++   vtn_branch_type_switch_fallthrough,
++   vtn_branch_type_loop_break,
++   vtn_branch_type_loop_continue,
++   vtn_branch_type_discard,
++   vtn_branch_type_return,
++};
++
++enum vtn_cf_node_type {
++   vtn_cf_node_type_block,
++   vtn_cf_node_type_if,
++   vtn_cf_node_type_loop,
++   vtn_cf_node_type_switch,
++};
++
++struct vtn_cf_node {
++   struct list_head link;
++   enum vtn_cf_node_type type;
++};
++
++struct vtn_loop {
++   struct vtn_cf_node node;
++
++   /* The main body of the loop */
++   struct list_head body;
++
++   /* The "continue" part of the loop.  This gets executed after the body
++    * and is where you go when you hit a continue.
++    */
++   struct list_head cont_body;
++
++   SpvLoopControlMask control;
++};
++
++struct vtn_if {
++   struct vtn_cf_node node;
++
++   uint32_t condition;
++
++   enum vtn_branch_type then_type;
++   struct list_head then_body;
++
++   enum vtn_branch_type else_type;
++   struct list_head else_body;
++
++   SpvSelectionControlMask control;
++};
++
++struct vtn_case {
++   struct list_head link;
++
++   struct list_head body;
++
++   /* The block that starts this case */
++   struct vtn_block *start_block;
++
++   /* The fallthrough case, if any */
++   struct vtn_case *fallthrough;
++
++   /* The uint32_t values that map to this case */
++   nir_array values;
++
++   /* True if this is the default case */
++   bool is_default;
++
++   /* Initialized to false; used when sorting the list of cases */
++   bool visited;
++};
++
++struct vtn_switch {
++   struct vtn_cf_node node;
++
++   uint32_t selector;
++
++   struct list_head cases;
++};
++
++struct vtn_block {
++   struct vtn_cf_node node;
++
++   /** A pointer to the label instruction */
++   const uint32_t *label;
++
++   /** A pointer to the merge instruction (or NULL if non exists) */
++   const uint32_t *merge;
++
++   /** A pointer to the branch instruction that ends this block */
++   const uint32_t *branch;
++
++   enum vtn_branch_type branch_type;
++
++   /** Points to the loop that this block starts (if it starts a loop) */
++   struct vtn_loop *loop;
++
++   /** Points to the switch case started by this block (if any) */
++   struct vtn_case *switch_case;
++
++   /** The last block in this SPIR-V block. */
++   nir_block *end_block;
++};
++
++struct vtn_function {
++   struct exec_node node;
++
++   nir_function_impl *impl;
++   struct vtn_block *start_block;
++
++   struct list_head body;
++
++   const uint32_t *end;
++
++   SpvFunctionControlMask control;
++};
++
++typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t,
++                                        const uint32_t *, unsigned);
++
++void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words,
++                   const uint32_t *end);
++void vtn_function_emit(struct vtn_builder *b, struct vtn_function *func,
++                       vtn_instruction_handler instruction_handler);
++
++const uint32_t *
++vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
++                        const uint32_t *end, vtn_instruction_handler handler);
++
++struct vtn_ssa_value {
++   union {
++      nir_ssa_def *def;
++      struct vtn_ssa_value **elems;
++   };
++
++   /* For matrices, if this is non-NULL, then this value is actually the
++    * transpose of some other value.  The value that `transposed` points to
++    * always dominates this value.
++    */
++   struct vtn_ssa_value *transposed;
++
++   const struct glsl_type *type;
++};
++
++struct vtn_type {
++   const struct glsl_type *type;
++
++   /* The value that declares this type.  Used for finding decorations */
++   struct vtn_value *val;
++
++   /* for matrices, whether the matrix is stored row-major */
++   bool row_major;
++
++   /* for structs, the offset of each member */
++   unsigned *offsets;
++
++   /* for structs, whether it was decorated as a "non-SSBO-like" block */
++   bool block;
++
++   /* for structs, whether it was decorated as an "SSBO-like" block */
++   bool buffer_block;
++
++   /* for structs with block == true, whether this is a builtin block (i.e. a
++    * block that contains only builtins).
++    */
++   bool builtin_block;
++
++   /* Image format for image_load_store type images */
++   unsigned image_format;
++
++   /* Access qualifier for storage images */
++   SpvAccessQualifier access_qualifier;
++
++   /* for arrays and matrices, the array stride */
++   unsigned stride;
++
++   /* for arrays, the vtn_type for the elements of the array */
++   struct vtn_type *array_element;
++
++   /* for structures, the vtn_type for each member */
++   struct vtn_type **members;
++
++   /* Whether this type, or a parent type, has been decorated as a builtin */
++   bool is_builtin;
++
++   SpvBuiltIn builtin;
++};
++
++struct vtn_variable;
++
++enum vtn_access_mode {
++   vtn_access_mode_id,
++   vtn_access_mode_literal,
++};
++
++struct vtn_access_link {
++   enum vtn_access_mode mode;
++   uint32_t id;
++};
++
++struct vtn_access_chain {
++   struct vtn_variable *var;
++
++   uint32_t length;
++
++   /* Struct elements and array offsets */
++   struct vtn_access_link link[0];
++};
++
++enum vtn_variable_mode {
++   vtn_variable_mode_local,
++   vtn_variable_mode_global,
++   vtn_variable_mode_param,
++   vtn_variable_mode_ubo,
++   vtn_variable_mode_ssbo,
++   vtn_variable_mode_push_constant,
++   vtn_variable_mode_image,
++   vtn_variable_mode_sampler,
++   vtn_variable_mode_workgroup,
++   vtn_variable_mode_input,
++   vtn_variable_mode_output,
++};
++
++struct vtn_variable {
++   enum vtn_variable_mode mode;
++
++   struct vtn_type *type;
++
++   unsigned descriptor_set;
++   unsigned binding;
++
++   nir_variable *var;
++   nir_variable **members;
++
++   struct vtn_access_chain chain;
++};
++
++struct vtn_image_pointer {
++   struct vtn_access_chain *image;
++   nir_ssa_def *coord;
++   nir_ssa_def *sample;
++};
++
++struct vtn_sampled_image {
++   struct vtn_access_chain *image; /* Image or array of images */
++   struct vtn_access_chain *sampler; /* Sampler */
++};
++
++struct vtn_value {
++   enum vtn_value_type value_type;
++   const char *name;
++   struct vtn_decoration *decoration;
++   union {
++      void *ptr;
++      char *str;
++      struct vtn_type *type;
++      struct {
++         nir_constant *constant;
++         const struct glsl_type *const_type;
++      };
++      struct vtn_access_chain *access_chain;
++      struct vtn_image_pointer *image;
++      struct vtn_sampled_image *sampled_image;
++      struct vtn_function *func;
++      struct vtn_block *block;
++      struct vtn_ssa_value *ssa;
++      vtn_instruction_handler ext_handler;
++   };
++};
++
++#define VTN_DEC_DECORATION -1
++#define VTN_DEC_EXECUTION_MODE -2
++#define VTN_DEC_STRUCT_MEMBER0 0
++
++struct vtn_decoration {
++   struct vtn_decoration *next;
++
++   /* Specifies how to apply this decoration.  Negative values represent a
++    * decoration or execution mode. (See the VTN_DEC_ #defines above.)
++    * Non-negative values specify that it applies to a structure member.
++    */
++   int scope;
++
++   const uint32_t *literals;
++   struct vtn_value *group;
++
++   union {
++      SpvDecoration decoration;
++      SpvExecutionMode exec_mode;
++   };
++};
++
++struct vtn_builder {
++   nir_builder nb;
++
++   nir_shader *shader;
++   nir_function_impl *impl;
++   struct vtn_block *block;
++
++   /* Current file, line, and column.  Useful for debugging.  Set
++    * automatically by vtn_foreach_instruction.
++    */
++   char *file;
++   int line, col;
++
++   /*
++    * In SPIR-V, constants are global, whereas in NIR, the load_const
++    * instruction we use is per-function. So while we parse each function, we
++    * keep a hash table of constants we've resolved to nir_ssa_value's so
++    * far, and we lazily resolve them when we see them used in a function.
++    */
++   struct hash_table *const_table;
++
++   /*
++    * Map from phi instructions (pointer to the start of the instruction)
++    * to the variable corresponding to it.
++    */
++   struct hash_table *phi_table;
++
++   unsigned num_specializations;
++   struct nir_spirv_specialization *specializations;
++
++   unsigned value_id_bound;
++   struct vtn_value *values;
++
++   gl_shader_stage entry_point_stage;
++   const char *entry_point_name;
++   struct vtn_value *entry_point;
++   bool origin_upper_left;
++
++   struct vtn_function *func;
++   struct exec_list functions;
++
++   /* Current function parameter index */
++   unsigned func_param_idx;
++
++   bool has_loop_continue;
++};
++
++static inline struct vtn_value *
++vtn_push_value(struct vtn_builder *b, uint32_t value_id,
++               enum vtn_value_type value_type)
++{
++   assert(value_id < b->value_id_bound);
++   assert(b->values[value_id].value_type == vtn_value_type_invalid);
++
++   b->values[value_id].value_type = value_type;
++
++   return &b->values[value_id];
++}
++
++static inline struct vtn_value *
++vtn_untyped_value(struct vtn_builder *b, uint32_t value_id)
++{
++   assert(value_id < b->value_id_bound);
++   return &b->values[value_id];
++}
++
++static inline struct vtn_value *
++vtn_value(struct vtn_builder *b, uint32_t value_id,
++          enum vtn_value_type value_type)
++{
++   struct vtn_value *val = vtn_untyped_value(b, value_id);
++   assert(val->value_type == value_type);
++   return val;
++}
++
++struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id);
++
++struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b,
++                                           const struct glsl_type *type);
++
++struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b,
++                                        struct vtn_ssa_value *src);
++
++nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src,
++                                unsigned index);
++nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
++                                        nir_ssa_def *index);
++nir_ssa_def *vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src,
++                               nir_ssa_def *insert, unsigned index);
++nir_ssa_def *vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src,
++                                       nir_ssa_def *insert, nir_ssa_def *index);
++
++nir_deref_var *vtn_nir_deref(struct vtn_builder *b, uint32_t id);
++
++nir_deref_var *vtn_access_chain_to_deref(struct vtn_builder *b,
++                                         struct vtn_access_chain *chain);
++nir_ssa_def *
++vtn_access_chain_to_offset(struct vtn_builder *b,
++                           struct vtn_access_chain *chain,
++                           nir_ssa_def **index_out, struct vtn_type **type_out,
++                           unsigned *end_idx_out, bool stop_at_matrix);
++
++struct vtn_ssa_value *vtn_local_load(struct vtn_builder *b, nir_deref_var *src);
++
++void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src,
++                     nir_deref_var *dest);
++
++struct vtn_ssa_value *
++vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src);
++
++void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
++                        struct vtn_access_chain *dest);
++
++void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
++                          const uint32_t *w, unsigned count);
++
++
++typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *,
++                                          struct vtn_value *,
++                                          int member,
++                                          const struct vtn_decoration *,
++                                          void *);
++
++void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
++                            vtn_decoration_foreach_cb cb, void *data);
++
++typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *,
++                                              struct vtn_value *,
++                                              const struct vtn_decoration *,
++                                              void *);
++
++void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
++                                vtn_execution_mode_foreach_cb cb, void *data);
++
++nir_op vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap);
++
++void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
++                    const uint32_t *w, unsigned count);
++
++bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
++                                    const uint32_t *words, unsigned count);
diff --cc src/compiler/nir/spirv/vtn_variables.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..3ad98aa5310342ae955b3359ba7045df887aadb5

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/spirv/vtn_variables.c
@@@ -1,0 -1,0 +1,1412 @@@
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ *
++ * Authors:
++ *    Jason Ekstrand (jason@jlekstrand.net)
++ *
++ */
++
++#include "vtn_private.h"
++
++static struct vtn_access_chain *
++vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old,
++                        unsigned new_ids)
++{
++   struct vtn_access_chain *chain;
++
++   unsigned new_len = old->length + new_ids;
++   chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->link[0]));
++
++   chain->var = old->var;
++   chain->length = new_len;
++
++   for (unsigned i = 0; i < old->length; i++)
++      chain->link[i] = old->link[i];
++
++   return chain;
++}
++
++static nir_ssa_def *
++vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link,
++                       unsigned stride)
++{
++   assert(stride > 0);
++   if (link.mode == vtn_access_mode_literal) {
++      return nir_imm_int(&b->nb, link.id * stride);
++   } else if (stride == 1) {
++      return vtn_ssa_value(b, link.id)->def;
++   } else {
++      return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def,
++                              nir_imm_int(&b->nb, stride));
++   }
++}
++
++static struct vtn_type *
++vtn_access_chain_tail_type(struct vtn_builder *b,
++                           struct vtn_access_chain *chain)
++{
++   struct vtn_type *type = chain->var->type;
++   for (unsigned i = 0; i < chain->length; i++) {
++      if (glsl_type_is_struct(type->type)) {
++         assert(chain->link[i].mode == vtn_access_mode_literal);
++         type = type->members[chain->link[i].id];
++      } else {
++         type = type->array_element;
++      }
++   }
++   return type;
++}
++
++/* Crawls a chain of array derefs and rewrites the types so that the
++ * lengths stay the same but the terminal type is the one given by
++ * tail_type.  This is useful for split structures.
++ */
++static void
++rewrite_deref_types(nir_deref *deref, const struct glsl_type *type)
++{
++   deref->type = type;
++   if (deref->child) {
++      assert(deref->child->deref_type == nir_deref_type_array);
++      assert(glsl_type_is_array(deref->type));
++      rewrite_deref_types(deref->child, glsl_get_array_element(type));
++   }
++}
++
++nir_deref_var *
++vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain)
++{
++   nir_deref_var *deref_var;
++   if (chain->var->var) {
++      deref_var = nir_deref_var_create(b, chain->var->var);
++   } else {
++      assert(chain->var->members);
++      /* Create the deref_var manually.  It will get filled out later. */
++      deref_var = rzalloc(b, nir_deref_var);
++      deref_var->deref.deref_type = nir_deref_type_var;
++   }
++
++   struct vtn_type *deref_type = chain->var->type;
++   nir_deref *tail = &deref_var->deref;
++   nir_variable **members = chain->var->members;
++
++   for (unsigned i = 0; i < chain->length; i++) {
++      enum glsl_base_type base_type = glsl_get_base_type(deref_type->type);
++      switch (base_type) {
++      case GLSL_TYPE_UINT:
++      case GLSL_TYPE_INT:
++      case GLSL_TYPE_FLOAT:
++      case GLSL_TYPE_DOUBLE:
++      case GLSL_TYPE_BOOL:
++      case GLSL_TYPE_ARRAY: {
++         deref_type = deref_type->array_element;
++
++         nir_deref_array *deref_arr = nir_deref_array_create(b);
++         deref_arr->deref.type = deref_type->type;
++
++         if (chain->link[i].mode == vtn_access_mode_literal) {
++            deref_arr->deref_array_type = nir_deref_array_type_direct;
++            deref_arr->base_offset = chain->link[i].id;
++         } else {
++            assert(chain->link[i].mode == vtn_access_mode_id);
++            deref_arr->deref_array_type = nir_deref_array_type_indirect;
++            deref_arr->base_offset = 0;
++            deref_arr->indirect =
++               nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def);
++         }
++         tail->child = &deref_arr->deref;
++         tail = tail->child;
++         break;
++      }
++
++      case GLSL_TYPE_STRUCT: {
++         assert(chain->link[i].mode == vtn_access_mode_literal);
++         unsigned idx = chain->link[i].id;
++         deref_type = deref_type->members[idx];
++         if (members) {
++            /* This is a pre-split structure. */
++            deref_var->var = members[idx];
++            rewrite_deref_types(&deref_var->deref, members[idx]->type);
++            assert(tail->type == deref_type->type);
++            members = NULL;
++         } else {
++            nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx);
++            deref_struct->deref.type = deref_type->type;
++            tail->child = &deref_struct->deref;
++            tail = tail->child;
++         }
++         break;
++      }
++      default:
++         unreachable("Invalid type for deref");
++      }
++   }
++
++   assert(members == NULL);
++   return deref_var;
++}
++
++static void
++_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref,
++                      nir_deref *tail, struct vtn_ssa_value *inout)
++{
++   /* The deref tail may contain a deref to select a component of a vector (in
++    * other words, it might not be an actual tail) so we have to save it away
++    * here since we overwrite it later.
++    */
++   nir_deref *old_child = tail->child;
++
++   if (glsl_type_is_vector_or_scalar(tail->type)) {
++      /* Terminate the deref chain in case there is one more link to pick
++       * off a component of the vector.
++       */
++      tail->child = NULL;
++
++      nir_intrinsic_op op = load ? nir_intrinsic_load_var :
++                                   nir_intrinsic_store_var;
++
++      nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
++      intrin->variables[0] =
++         nir_deref_as_var(nir_copy_deref(intrin, &deref->deref));
++      intrin->num_components = glsl_get_vector_elements(tail->type);
++
++      if (load) {
++         nir_ssa_dest_init(&intrin->instr, &intrin->dest,
++                           intrin->num_components, NULL);
++         inout->def = &intrin->dest.ssa;
++      } else {
++         intrin->const_index[0] = (1 << intrin->num_components) - 1;
++         intrin->src[0] = nir_src_for_ssa(inout->def);
++      }
++
++      nir_builder_instr_insert(&b->nb, &intrin->instr);
++   } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY ||
++              glsl_type_is_matrix(tail->type)) {
++      unsigned elems = glsl_get_length(tail->type);
++      nir_deref_array *deref_arr = nir_deref_array_create(b);
++      deref_arr->deref_array_type = nir_deref_array_type_direct;
++      deref_arr->deref.type = glsl_get_array_element(tail->type);
++      tail->child = &deref_arr->deref;
++      for (unsigned i = 0; i < elems; i++) {
++         deref_arr->base_offset = i;
++         _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
++      }
++   } else {
++      assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT);
++      unsigned elems = glsl_get_length(tail->type);
++      nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0);
++      tail->child = &deref_struct->deref;
++      for (unsigned i = 0; i < elems; i++) {
++         deref_struct->index = i;
++         deref_struct->deref.type = glsl_get_struct_field(tail->type, i);
++         _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
++      }
++   }
++
++   tail->child = old_child;
++}
++
++nir_deref_var *
++vtn_nir_deref(struct vtn_builder *b, uint32_t id)
++{
++   struct vtn_access_chain *chain =
++      vtn_value(b, id, vtn_value_type_access_chain)->access_chain;
++
++   return vtn_access_chain_to_deref(b, chain);
++}
++
++/*
++ * Gets the NIR-level deref tail, which may have as a child an array deref
++ * selecting which component due to OpAccessChain supporting per-component
++ * indexing in SPIR-V.
++ */
++static nir_deref *
++get_deref_tail(nir_deref_var *deref)
++{
++   nir_deref *cur = &deref->deref;
++   while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child)
++      cur = cur->child;
++
++   return cur;
++}
++
++struct vtn_ssa_value *
++vtn_local_load(struct vtn_builder *b, nir_deref_var *src)
++{
++   nir_deref *src_tail = get_deref_tail(src);
++   struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type);
++   _vtn_local_load_store(b, true, src, src_tail, val);
++
++   if (src_tail->child) {
++      nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child);
++      assert(vec_deref->deref.child == NULL);
++      val->type = vec_deref->deref.type;
++      if (vec_deref->deref_array_type == nir_deref_array_type_direct)
++         val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset);
++      else
++         val->def = vtn_vector_extract_dynamic(b, val->def,
++                                               vec_deref->indirect.ssa);
++   }
++
++   return val;
++}
++
++void
++vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src,
++                nir_deref_var *dest)
++{
++   nir_deref *dest_tail = get_deref_tail(dest);
++
++   if (dest_tail->child) {
++      struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type);
++      _vtn_local_load_store(b, true, dest, dest_tail, val);
++      nir_deref_array *deref = nir_deref_as_array(dest_tail->child);
++      assert(deref->deref.child == NULL);
++      if (deref->deref_array_type == nir_deref_array_type_direct)
++         val->def = vtn_vector_insert(b, val->def, src->def,
++                                      deref->base_offset);
++      else
++         val->def = vtn_vector_insert_dynamic(b, val->def, src->def,
++                                              deref->indirect.ssa);
++      _vtn_local_load_store(b, false, dest, dest_tail, val);
++   } else {
++      _vtn_local_load_store(b, false, dest, dest_tail, src);
++   }
++}
++
++static nir_ssa_def *
++get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain,
++                          struct vtn_type **type, unsigned *chain_idx)
++{
++   /* Push constants have no explicit binding */
++   if (chain->var->mode == vtn_variable_mode_push_constant) {
++      *chain_idx = 0;
++      *type = chain->var->type;
++      return NULL;
++   }
++
++   nir_ssa_def *array_index;
++   if (glsl_type_is_array(chain->var->type->type)) {
++      assert(chain->length > 0);
++      array_index = vtn_access_link_as_ssa(b, chain->link[0], 1);
++      *chain_idx = 1;
++      *type = chain->var->type->array_element;
++   } else {
++      array_index = nir_imm_int(&b->nb, 0);
++      *chain_idx = 0;
++      *type = chain->var->type;
++   }
++
++   nir_intrinsic_instr *instr =
++      nir_intrinsic_instr_create(b->nb.shader,
++                                 nir_intrinsic_vulkan_resource_index);
++   instr->src[0] = nir_src_for_ssa(array_index);
++   instr->const_index[0] = chain->var->descriptor_set;
++   instr->const_index[1] = chain->var->binding;
++
++   nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
++   nir_builder_instr_insert(&b->nb, &instr->instr);
++
++   return &instr->dest.ssa;
++}
++
++nir_ssa_def *
++vtn_access_chain_to_offset(struct vtn_builder *b,
++                           struct vtn_access_chain *chain,
++                           nir_ssa_def **index_out, struct vtn_type **type_out,
++                           unsigned *end_idx_out, bool stop_at_matrix)
++{
++   unsigned idx = 0;
++   struct vtn_type *type;
++   *index_out = get_vulkan_resource_index(b, chain, &type, &idx);
++
++   nir_ssa_def *offset = nir_imm_int(&b->nb, 0);
++   for (; idx < chain->length; idx++) {
++      enum glsl_base_type base_type = glsl_get_base_type(type->type);
++      switch (base_type) {
++      case GLSL_TYPE_UINT:
++      case GLSL_TYPE_INT:
++      case GLSL_TYPE_FLOAT:
++      case GLSL_TYPE_DOUBLE:
++      case GLSL_TYPE_BOOL:
++         /* Some users may not want matrix or vector derefs */
++         if (stop_at_matrix)
++            goto end;
++         /* Fall through */
++
++      case GLSL_TYPE_ARRAY:
++         offset = nir_iadd(&b->nb, offset,
++                           vtn_access_link_as_ssa(b, chain->link[idx],
++                                                  type->stride));
++
++         type = type->array_element;
++         break;
++
++      case GLSL_TYPE_STRUCT: {
++         assert(chain->link[idx].mode == vtn_access_mode_literal);
++         unsigned member = chain->link[idx].id;
++         offset = nir_iadd(&b->nb, offset,
++                           nir_imm_int(&b->nb, type->offsets[member]));
++         type = type->members[member];
++         break;
++      }
++
++      default:
++         unreachable("Invalid type for deref");
++      }
++   }
++
++end:
++   *type_out = type;
++   if (end_idx_out)
++      *end_idx_out = idx;
++
++   return offset;
++}
++
++static void
++_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
++                     nir_ssa_def *index, nir_ssa_def *offset,
++                     struct vtn_ssa_value **inout, const struct glsl_type *type)
++{
++   nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op);
++   instr->num_components = glsl_get_vector_elements(type);
++
++   int src = 0;
++   if (!load) {
++      instr->const_index[0] = (1 << instr->num_components) - 1; /* write mask */
++      instr->src[src++] = nir_src_for_ssa((*inout)->def);
++   }
++
++   /* We set the base and size for push constant load to the entire push
++    * constant block for now.
++    */
++   if (op == nir_intrinsic_load_push_constant) {
++      instr->const_index[0] = 0;
++      instr->const_index[1] = 128;
++   }
++
++   if (index)
++      instr->src[src++] = nir_src_for_ssa(index);
++
++   instr->src[src++] = nir_src_for_ssa(offset);
++
++   if (load) {
++      nir_ssa_dest_init(&instr->instr, &instr->dest,
++                        instr->num_components, NULL);
++      (*inout)->def = &instr->dest.ssa;
++   }
++
++   nir_builder_instr_insert(&b->nb, &instr->instr);
++
++   if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL)
++      (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0));
++}
++
++static void
++_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load,
++                      nir_ssa_def *index, nir_ssa_def *offset,
++                      struct vtn_access_chain *chain, unsigned chain_idx,
++                      struct vtn_type *type, struct vtn_ssa_value **inout)
++{
++   if (chain && chain_idx >= chain->length)
++      chain = NULL;
++
++   if (load && chain == NULL && *inout == NULL)
++      *inout = vtn_create_ssa_value(b, type->type);
++
++   enum glsl_base_type base_type = glsl_get_base_type(type->type);
++   switch (base_type) {
++   case GLSL_TYPE_UINT:
++   case GLSL_TYPE_INT:
++   case GLSL_TYPE_FLOAT:
++   case GLSL_TYPE_BOOL:
++      /* This is where things get interesting.  At this point, we've hit
++       * a vector, a scalar, or a matrix.
++       */
++      if (glsl_type_is_matrix(type->type)) {
++         if (chain == NULL) {
++            /* Loading the whole matrix */
++            struct vtn_ssa_value *transpose;
++            unsigned num_ops, vec_width;
++            if (type->row_major) {
++               num_ops = glsl_get_vector_elements(type->type);
++               vec_width = glsl_get_matrix_columns(type->type);
++               if (load) {
++                  const struct glsl_type *transpose_type =
++                     glsl_matrix_type(base_type, vec_width, num_ops);
++                  *inout = vtn_create_ssa_value(b, transpose_type);
++               } else {
++                  transpose = vtn_ssa_transpose(b, *inout);
++                  inout = &transpose;
++               }
++            } else {
++               num_ops = glsl_get_matrix_columns(type->type);
++               vec_width = glsl_get_vector_elements(type->type);
++            }
++
++            for (unsigned i = 0; i < num_ops; i++) {
++               nir_ssa_def *elem_offset =
++                  nir_iadd(&b->nb, offset,
++                           nir_imm_int(&b->nb, i * type->stride));
++               _vtn_load_store_tail(b, op, load, index, elem_offset,
++                                    &(*inout)->elems[i],
++                                    glsl_vector_type(base_type, vec_width));
++            }
++
++            if (load && type->row_major)
++               *inout = vtn_ssa_transpose(b, *inout);
++         } else if (type->row_major) {
++            /* Row-major but with an access chiain. */
++            nir_ssa_def *col_offset =
++               vtn_access_link_as_ssa(b, chain->link[chain_idx],
++                                      type->array_element->stride);
++            offset = nir_iadd(&b->nb, offset, col_offset);
++
++            if (chain_idx + 1 < chain->length) {
++               /* Picking off a single element */
++               nir_ssa_def *row_offset =
++                  vtn_access_link_as_ssa(b, chain->link[chain_idx + 1],
++                                         type->stride);
++               offset = nir_iadd(&b->nb, offset, row_offset);
++               if (load)
++                  *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
++               _vtn_load_store_tail(b, op, load, index, offset, inout,
++                                    glsl_scalar_type(base_type));
++            } else {
++               /* Grabbing a column; picking one element off each row */
++               unsigned num_comps = glsl_get_vector_elements(type->type);
++               const struct glsl_type *column_type =
++                  glsl_get_column_type(type->type);
++
++               nir_ssa_def *comps[4];
++               for (unsigned i = 0; i < num_comps; i++) {
++                  nir_ssa_def *elem_offset =
++                     nir_iadd(&b->nb, offset,
++                              nir_imm_int(&b->nb, i * type->stride));
++
++                  struct vtn_ssa_value *comp, temp_val;
++                  if (!load) {
++                     temp_val.def = nir_channel(&b->nb, (*inout)->def, i);
++                     temp_val.type = glsl_scalar_type(base_type);
++                  }
++                  comp = &temp_val;
++                  _vtn_load_store_tail(b, op, load, index, elem_offset,
++                                       &comp, glsl_scalar_type(base_type));
++                  comps[i] = comp->def;
++               }
++
++               if (load) {
++                  if (*inout == NULL)
++                     *inout = vtn_create_ssa_value(b, column_type);
++
++                  (*inout)->def = nir_vec(&b->nb, comps, num_comps);
++               }
++            }
++         } else {
++            /* Column-major with a deref. Fall through to array case. */
++            nir_ssa_def *col_offset =
++               vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
++            offset = nir_iadd(&b->nb, offset, col_offset);
++
++            _vtn_block_load_store(b, op, load, index, offset,
++                                  chain, chain_idx + 1,
++                                  type->array_element, inout);
++         }
++      } else if (chain == NULL) {
++         /* Single whole vector */
++         assert(glsl_type_is_vector_or_scalar(type->type));
++         _vtn_load_store_tail(b, op, load, index, offset, inout, type->type);
++      } else {
++         /* Single component of a vector. Fall through to array case. */
++         nir_ssa_def *elem_offset =
++            vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
++         offset = nir_iadd(&b->nb, offset, elem_offset);
++
++         _vtn_block_load_store(b, op, load, index, offset, NULL, 0,
++                               type->array_element, inout);
++      }
++      return;
++
++   case GLSL_TYPE_ARRAY: {
++      unsigned elems = glsl_get_length(type->type);
++      for (unsigned i = 0; i < elems; i++) {
++         nir_ssa_def *elem_off =
++            nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride));
++         _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
++                               type->array_element, &(*inout)->elems[i]);
++      }
++      return;
++   }
++
++   case GLSL_TYPE_STRUCT: {
++      unsigned elems = glsl_get_length(type->type);
++      for (unsigned i = 0; i < elems; i++) {
++         nir_ssa_def *elem_off =
++            nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i]));
++         _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
++                               type->members[i], &(*inout)->elems[i]);
++      }
++      return;
++   }
++
++   default:
++      unreachable("Invalid block member type");
++   }
++}
++
++static struct vtn_ssa_value *
++vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src)
++{
++   nir_intrinsic_op op;
++   switch (src->var->mode) {
++   case vtn_variable_mode_ubo:
++      op = nir_intrinsic_load_ubo;
++      break;
++   case vtn_variable_mode_ssbo:
++      op = nir_intrinsic_load_ssbo;
++      break;
++   case vtn_variable_mode_push_constant:
++      op = nir_intrinsic_load_push_constant;
++      break;
++   default:
++      assert(!"Invalid block variable mode");
++   }
++
++   nir_ssa_def *offset, *index = NULL;
++   struct vtn_type *type;
++   unsigned chain_idx;
++   offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true);
++
++   struct vtn_ssa_value *value = NULL;
++   _vtn_block_load_store(b, op, true, index, offset,
++                         src, chain_idx, type, &value);
++   return value;
++}
++
++static void
++vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src,
++                struct vtn_access_chain *dst)
++{
++   nir_ssa_def *offset, *index = NULL;
++   struct vtn_type *type;
++   unsigned chain_idx;
++   offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true);
++
++   _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset,
++                         dst, chain_idx, type, &src);
++}
++
++static bool
++vtn_variable_is_external_block(struct vtn_variable *var)
++{
++   return var->mode == vtn_variable_mode_ssbo ||
++          var->mode == vtn_variable_mode_ubo ||
++          var->mode == vtn_variable_mode_push_constant;
++}
++
++static void
++_vtn_variable_load_store(struct vtn_builder *b, bool load,
++                         struct vtn_access_chain *chain,
++                         struct vtn_type *tail_type,
++                         struct vtn_ssa_value **inout)
++{
++   enum glsl_base_type base_type = glsl_get_base_type(tail_type->type);
++   switch (base_type) {
++   case GLSL_TYPE_UINT:
++   case GLSL_TYPE_INT:
++   case GLSL_TYPE_FLOAT:
++   case GLSL_TYPE_BOOL:
++      /* At this point, we have a scalar, vector, or matrix so we know that
++       * there cannot be any structure splitting still in the way.  By
++       * stopping at the matrix level rather than the vector level, we
++       * ensure that matrices get loaded in the optimal way even if they
++       * are storred row-major in a UBO.
++       */
++      if (load) {
++         *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain));
++      } else {
++         vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain));
++      }
++      return;
++
++   case GLSL_TYPE_ARRAY:
++   case GLSL_TYPE_STRUCT: {
++      struct vtn_access_chain *new_chain =
++         vtn_access_chain_extend(b, chain, 1);
++      new_chain->link[chain->length].mode = vtn_access_mode_literal;
++      unsigned elems = glsl_get_length(tail_type->type);
++      if (load) {
++         assert(*inout == NULL);
++         *inout = rzalloc(b, struct vtn_ssa_value);
++         (*inout)->type = tail_type->type;
++         (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems);
++      }
++      for (unsigned i = 0; i < elems; i++) {
++         new_chain->link[chain->length].id = i;
++         struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ?
++            tail_type->array_element : tail_type->members[i];
++         _vtn_variable_load_store(b, load, new_chain, elem_type,
++                                  &(*inout)->elems[i]);
++      }
++      return;
++   }
++
++   default:
++      unreachable("Invalid access chain type");
++   }
++}
++
++struct vtn_ssa_value *
++vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src)
++{
++   if (vtn_variable_is_external_block(src->var)) {
++      return vtn_block_load(b, src);
++   } else {
++      struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src);
++      struct vtn_ssa_value *val = NULL;
++      _vtn_variable_load_store(b, true, src, tail_type, &val);
++      return val;
++   }
++}
++
++void
++vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
++                   struct vtn_access_chain *dest)
++{
++   if (vtn_variable_is_external_block(dest->var)) {
++      assert(dest->var->mode == vtn_variable_mode_ssbo);
++      vtn_block_store(b, src, dest);
++   } else {
++      struct vtn_type *tail_type = vtn_access_chain_tail_type(b, dest);
++      _vtn_variable_load_store(b, false, dest, tail_type, &src);
++   }
++}
++
++static void
++_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest,
++                   struct vtn_access_chain *src, struct vtn_type *tail_type)
++{
++   enum glsl_base_type base_type = glsl_get_base_type(tail_type->type);
++   switch (base_type) {
++   case GLSL_TYPE_UINT:
++   case GLSL_TYPE_INT:
++   case GLSL_TYPE_FLOAT:
++   case GLSL_TYPE_BOOL:
++      /* At this point, we have a scalar, vector, or matrix so we know that
++       * there cannot be any structure splitting still in the way.  By
++       * stopping at the matrix level rather than the vector level, we
++       * ensure that matrices get loaded in the optimal way even if they
++       * are storred row-major in a UBO.
++       */
++      vtn_variable_store(b, vtn_variable_load(b, src), dest);
++      return;
++
++   case GLSL_TYPE_ARRAY:
++   case GLSL_TYPE_STRUCT: {
++      struct vtn_access_chain *new_src, *new_dest;
++      new_src = vtn_access_chain_extend(b, src, 1);
++      new_dest = vtn_access_chain_extend(b, dest, 1);
++      new_src->link[src->length].mode = vtn_access_mode_literal;
++      new_dest->link[dest->length].mode = vtn_access_mode_literal;
++      unsigned elems = glsl_get_length(tail_type->type);
++      for (unsigned i = 0; i < elems; i++) {
++         new_src->link[src->length].id = i;
++         new_dest->link[dest->length].id = i;
++         struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ?
++            tail_type->array_element : tail_type->members[i];
++         _vtn_variable_copy(b, new_dest, new_src, elem_type);
++      }
++      return;
++   }
++
++   default:
++      unreachable("Invalid access chain type");
++   }
++}
++
++static void
++vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest,
++                  struct vtn_access_chain *src)
++{
++   struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src);
++   assert(vtn_access_chain_tail_type(b, dest)->type == tail_type->type);
++
++   /* TODO: At some point, we should add a special-case for when we can
++    * just emit a copy_var intrinsic.
++    */
++   _vtn_variable_copy(b, dest, src, tail_type);
++}
++
++static void
++set_mode_system_value(nir_variable_mode *mode)
++{
++   assert(*mode == nir_var_system_value || *mode == nir_var_shader_in);
++   *mode = nir_var_system_value;
++}
++
++static void
++vtn_get_builtin_location(struct vtn_builder *b,
++                         SpvBuiltIn builtin, int *location,
++                         nir_variable_mode *mode)
++{
++   switch (builtin) {
++   case SpvBuiltInPosition:
++      *location = VARYING_SLOT_POS;
++      break;
++   case SpvBuiltInPointSize:
++      *location = VARYING_SLOT_PSIZ;
++      break;
++   case SpvBuiltInClipDistance:
++      *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */
++      break;
++   case SpvBuiltInCullDistance:
++      /* XXX figure this out */
++      break;
++   case SpvBuiltInVertexIndex:
++      *location = SYSTEM_VALUE_VERTEX_ID;
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInVertexId:
++      /* Vulkan defines VertexID to be zero-based and reserves the new
++       * builtin keyword VertexIndex to indicate the non-zero-based value.
++       */
++      *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInInstanceIndex:
++      *location = SYSTEM_VALUE_INSTANCE_INDEX;
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInInstanceId:
++      *location = SYSTEM_VALUE_INSTANCE_ID;
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInPrimitiveId:
++      *location = VARYING_SLOT_PRIMITIVE_ID;
++      *mode = nir_var_shader_out;
++      break;
++   case SpvBuiltInInvocationId:
++      *location = SYSTEM_VALUE_INVOCATION_ID;
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInLayer:
++      *location = VARYING_SLOT_LAYER;
++      *mode = nir_var_shader_out;
++      break;
++   case SpvBuiltInViewportIndex:
++      *location = VARYING_SLOT_VIEWPORT;
++      if (b->shader->stage == MESA_SHADER_GEOMETRY)
++         *mode = nir_var_shader_out;
++      else if (b->shader->stage == MESA_SHADER_FRAGMENT)
++         *mode = nir_var_shader_in;
++      else
++         unreachable("invalid stage for SpvBuiltInViewportIndex");
++      break;
++   case SpvBuiltInTessLevelOuter:
++   case SpvBuiltInTessLevelInner:
++   case SpvBuiltInTessCoord:
++   case SpvBuiltInPatchVertices:
++      unreachable("no tessellation support");
++   case SpvBuiltInFragCoord:
++      *location = VARYING_SLOT_POS;
++      assert(*mode == nir_var_shader_in);
++      break;
++   case SpvBuiltInPointCoord:
++      *location = VARYING_SLOT_PNTC;
++      assert(*mode == nir_var_shader_in);
++      break;
++   case SpvBuiltInFrontFacing:
++      *location = VARYING_SLOT_FACE;
++      assert(*mode == nir_var_shader_in);
++      break;
++   case SpvBuiltInSampleId:
++      *location = SYSTEM_VALUE_SAMPLE_ID;
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInSamplePosition:
++      *location = SYSTEM_VALUE_SAMPLE_POS;
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInSampleMask:
++      *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInFragDepth:
++      *location = FRAG_RESULT_DEPTH;
++      assert(*mode == nir_var_shader_out);
++      break;
++   case SpvBuiltInNumWorkgroups:
++      *location = SYSTEM_VALUE_NUM_WORK_GROUPS;
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInWorkgroupSize:
++      /* This should already be handled */
++      unreachable("unsupported builtin");
++      break;
++   case SpvBuiltInWorkgroupId:
++      *location = SYSTEM_VALUE_WORK_GROUP_ID;
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInLocalInvocationId:
++      *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID;
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInLocalInvocationIndex:
++      *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInGlobalInvocationId:
++      *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
++      set_mode_system_value(mode);
++      break;
++   case SpvBuiltInHelperInvocation:
++   default:
++      unreachable("unsupported builtin");
++   }
++}
++
++static void
++var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
++                  const struct vtn_decoration *dec, void *void_var)
++{
++   struct vtn_variable *vtn_var = void_var;
++
++   /* Handle decorations that apply to a vtn_variable as a whole */
++   switch (dec->decoration) {
++   case SpvDecorationBinding:
++      vtn_var->binding = dec->literals[0];
++      return;
++   case SpvDecorationDescriptorSet:
++      vtn_var->descriptor_set = dec->literals[0];
++      return;
++
++   case SpvDecorationLocation: {
++      unsigned location = dec->literals[0];
++      bool is_vertex_input;
++      if (b->shader->stage == MESA_SHADER_FRAGMENT &&
++          vtn_var->mode == vtn_variable_mode_output) {
++         is_vertex_input = false;
++         location += FRAG_RESULT_DATA0;
++      } else if (b->shader->stage == MESA_SHADER_VERTEX &&
++                 vtn_var->mode == vtn_variable_mode_input) {
++         is_vertex_input = true;
++         location += VERT_ATTRIB_GENERIC0;
++      } else if (vtn_var->mode == vtn_variable_mode_input ||
++                 vtn_var->mode == vtn_variable_mode_output) {
++         is_vertex_input = false;
++         location += VARYING_SLOT_VAR0;
++      } else {
++         assert(!"Location must be on input or output variable");
++      }
++
++      if (vtn_var->var) {
++         vtn_var->var->data.location = location;
++         vtn_var->var->data.explicit_location = true;
++      } else {
++         assert(vtn_var->members);
++         unsigned length = glsl_get_length(vtn_var->type->type);
++         for (unsigned i = 0; i < length; i++) {
++            vtn_var->members[i]->data.location = location;
++            vtn_var->members[i]->data.explicit_location = true;
++            location +=
++               glsl_count_attribute_slots(vtn_var->members[i]->interface_type,
++                                          is_vertex_input);
++         }
++      }
++      return;
++   }
++
++   default:
++      break;
++   }
++
++   /* Now we handle decorations that apply to a particular nir_variable */
++   nir_variable *nir_var = vtn_var->var;
++   if (val->value_type == vtn_value_type_access_chain) {
++      assert(val->access_chain->length == 0);
++      assert(val->access_chain->var == void_var);
++      assert(member == -1);
++   } else {
++      assert(val->value_type == vtn_value_type_type);
++      if (member != -1)
++         nir_var = vtn_var->members[member];
++   }
++
++   if (nir_var == NULL)
++      return;
++
++   switch (dec->decoration) {
++   case SpvDecorationRelaxedPrecision:
++      break; /* FIXME: Do nothing with this for now. */
++   case SpvDecorationNoPerspective:
++      nir_var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
++      break;
++   case SpvDecorationFlat:
++      nir_var->data.interpolation = INTERP_QUALIFIER_FLAT;
++      break;
++   case SpvDecorationCentroid:
++      nir_var->data.centroid = true;
++      break;
++   case SpvDecorationSample:
++      nir_var->data.sample = true;
++      break;
++   case SpvDecorationInvariant:
++      nir_var->data.invariant = true;
++      break;
++   case SpvDecorationConstant:
++      assert(nir_var->constant_initializer != NULL);
++      nir_var->data.read_only = true;
++      break;
++   case SpvDecorationNonWritable:
++      nir_var->data.read_only = true;
++      break;
++   case SpvDecorationComponent:
++      nir_var->data.location_frac = dec->literals[0];
++      break;
++   case SpvDecorationIndex:
++      nir_var->data.explicit_index = true;
++      nir_var->data.index = dec->literals[0];
++      break;
++   case SpvDecorationBuiltIn: {
++      SpvBuiltIn builtin = dec->literals[0];
++
++      if (builtin == SpvBuiltInWorkgroupSize) {
++         /* This shouldn't be a builtin.  It's actually a constant. */
++         nir_var->data.mode = nir_var_global;
++         nir_var->data.read_only = true;
++
++         nir_constant *c = rzalloc(nir_var, nir_constant);
++         c->value.u[0] = b->shader->info.cs.local_size[0];
++         c->value.u[1] = b->shader->info.cs.local_size[1];
++         c->value.u[2] = b->shader->info.cs.local_size[2];
++         nir_var->constant_initializer = c;
++         break;
++      }
++
++      nir_variable_mode mode = nir_var->data.mode;
++      vtn_get_builtin_location(b, builtin, &nir_var->data.location, &mode);
++      nir_var->data.explicit_location = true;
++      nir_var->data.mode = mode;
++
++      if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition)
++         nir_var->data.origin_upper_left = b->origin_upper_left;
++      break;
++   }
++   case SpvDecorationRowMajor:
++   case SpvDecorationColMajor:
++   case SpvDecorationGLSLShared:
++   case SpvDecorationPatch:
++   case SpvDecorationRestrict:
++   case SpvDecorationAliased:
++   case SpvDecorationVolatile:
++   case SpvDecorationCoherent:
++   case SpvDecorationNonReadable:
++   case SpvDecorationUniform:
++      /* This is really nice but we have no use for it right now. */
++   case SpvDecorationCPacked:
++   case SpvDecorationSaturatedConversion:
++   case SpvDecorationStream:
++   case SpvDecorationOffset:
++   case SpvDecorationXfbBuffer:
++   case SpvDecorationFuncParamAttr:
++   case SpvDecorationFPRoundingMode:
++   case SpvDecorationFPFastMathMode:
++   case SpvDecorationLinkageAttributes:
++   case SpvDecorationSpecId:
++      break;
++   default:
++      unreachable("Unhandled variable decoration");
++   }
++}
++
++/* Tries to compute the size of an interface block based on the strides and
++ * offsets that are provided to us in the SPIR-V source.
++ */
++static unsigned
++vtn_type_block_size(struct vtn_type *type)
++{
++   enum glsl_base_type base_type = glsl_get_base_type(type->type);
++   switch (base_type) {
++   case GLSL_TYPE_UINT:
++   case GLSL_TYPE_INT:
++   case GLSL_TYPE_FLOAT:
++   case GLSL_TYPE_BOOL:
++   case GLSL_TYPE_DOUBLE: {
++      unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) :
++                                        glsl_get_matrix_columns(type->type);
++      if (cols > 1) {
++         assert(type->stride > 0);
++         return type->stride * cols;
++      } else if (base_type == GLSL_TYPE_DOUBLE) {
++         return glsl_get_vector_elements(type->type) * 8;
++      } else {
++         return glsl_get_vector_elements(type->type) * 4;
++      }
++   }
++
++   case GLSL_TYPE_STRUCT:
++   case GLSL_TYPE_INTERFACE: {
++      unsigned size = 0;
++      unsigned num_fields = glsl_get_length(type->type);
++      for (unsigned f = 0; f < num_fields; f++) {
++         unsigned field_end = type->offsets[f] +
++                              vtn_type_block_size(type->members[f]);
++         size = MAX2(size, field_end);
++      }
++      return size;
++   }
++
++   case GLSL_TYPE_ARRAY:
++      assert(type->stride > 0);
++      assert(glsl_get_length(type->type) > 0);
++      return type->stride * glsl_get_length(type->type);
++
++   default:
++      assert(!"Invalid block type");
++      return 0;
++   }
++}
++
++void
++vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
++                     const uint32_t *w, unsigned count)
++{
++   switch (opcode) {
++   case SpvOpVariable: {
++      struct vtn_variable *var = rzalloc(b, struct vtn_variable);
++      var->type = vtn_value(b, w[1], vtn_value_type_type)->type;
++
++      var->chain.var = var;
++      var->chain.length = 0;
++
++      struct vtn_value *val =
++         vtn_push_value(b, w[2], vtn_value_type_access_chain);
++      val->access_chain = &var->chain;
++
++      struct vtn_type *without_array = var->type;
++      while(glsl_type_is_array(without_array->type))
++         without_array = without_array->array_element;
++
++      nir_variable_mode nir_mode;
++      switch ((SpvStorageClass)w[3]) {
++      case SpvStorageClassUniform:
++      case SpvStorageClassUniformConstant:
++         if (without_array->block) {
++            var->mode = vtn_variable_mode_ubo;
++            b->shader->info.num_ubos++;
++         } else if (without_array->buffer_block) {
++            var->mode = vtn_variable_mode_ssbo;
++            b->shader->info.num_ssbos++;
++         } else if (glsl_type_is_image(without_array->type)) {
++            var->mode = vtn_variable_mode_image;
++            nir_mode = nir_var_uniform;
++            b->shader->info.num_images++;
++         } else if (glsl_type_is_sampler(without_array->type)) {
++            var->mode = vtn_variable_mode_sampler;
++            nir_mode = nir_var_uniform;
++            b->shader->info.num_textures++;
++         } else {
++            assert(!"Invalid uniform variable type");
++         }
++         break;
++      case SpvStorageClassPushConstant:
++         var->mode = vtn_variable_mode_push_constant;
++         assert(b->shader->num_uniforms == 0);
++         b->shader->num_uniforms = vtn_type_block_size(var->type) * 4;
++         break;
++      case SpvStorageClassInput:
++         var->mode = vtn_variable_mode_input;
++         nir_mode = nir_var_shader_in;
++         break;
++      case SpvStorageClassOutput:
++         var->mode = vtn_variable_mode_output;
++         nir_mode = nir_var_shader_out;
++         break;
++      case SpvStorageClassPrivate:
++         var->mode = vtn_variable_mode_global;
++         nir_mode = nir_var_global;
++         break;
++      case SpvStorageClassFunction:
++         var->mode = vtn_variable_mode_local;
++         nir_mode = nir_var_local;
++         break;
++      case SpvStorageClassWorkgroup:
++         var->mode = vtn_variable_mode_workgroup;
++         nir_mode = nir_var_shared;
++         break;
++      case SpvStorageClassCrossWorkgroup:
++      case SpvStorageClassGeneric:
++      case SpvStorageClassAtomicCounter:
++      default:
++         unreachable("Unhandled variable storage class");
++      }
++
++      switch (var->mode) {
++      case vtn_variable_mode_local:
++      case vtn_variable_mode_global:
++      case vtn_variable_mode_image:
++      case vtn_variable_mode_sampler:
++      case vtn_variable_mode_workgroup:
++         /* For these, we create the variable normally */
++         var->var = rzalloc(b->shader, nir_variable);
++         var->var->name = ralloc_strdup(var->var, val->name);
++         var->var->type = var->type->type;
++         var->var->data.mode = nir_mode;
++
++         switch (var->mode) {
++         case vtn_variable_mode_image:
++         case vtn_variable_mode_sampler:
++            var->var->interface_type = without_array->type;
++            break;
++         default:
++            var->var->interface_type = NULL;
++            break;
++         }
++         break;
++
++      case vtn_variable_mode_input:
++      case vtn_variable_mode_output: {
++         /* For inputs and outputs, we immediately split structures.  This
++          * is for a couple of reasons.  For one, builtins may all come in
++          * a struct and we really want those split out into separate
++          * variables.  For another, interpolation qualifiers can be
++          * applied to members of the top-level struct ane we need to be
++          * able to preserve that information.
++          */
++
++         int array_length = -1;
++         struct vtn_type *interface_type = var->type;
++         if (b->shader->stage == MESA_SHADER_GEOMETRY &&
++             glsl_type_is_array(var->type->type)) {
++            /* In Geometry shaders (and some tessellation), inputs come
++             * in per-vertex arrays.  However, some builtins come in
++             * non-per-vertex, hence the need for the is_array check.  In
++             * any case, there are no non-builtin arrays allowed so this
++             * check should be sufficient.
++             */
++            interface_type = var->type->array_element;
++            array_length = glsl_get_length(var->type->type);
++         }
++
++         if (glsl_type_is_struct(interface_type->type)) {
++            /* It's a struct.  Split it. */
++            unsigned num_members = glsl_get_length(interface_type->type);
++            var->members = ralloc_array(b, nir_variable *, num_members);
++
++            for (unsigned i = 0; i < num_members; i++) {
++               const struct glsl_type *mtype = interface_type->members[i]->type;
++               if (array_length >= 0)
++                  mtype = glsl_array_type(mtype, array_length);
++
++               var->members[i] = rzalloc(b->shader, nir_variable);
++               var->members[i]->name =
++                  ralloc_asprintf(var->members[i], "%s.%d", val->name, i);
++               var->members[i]->type = mtype;
++               var->members[i]->interface_type =
++                  interface_type->members[i]->type;
++               var->members[i]->data.mode = nir_mode;
++            }
++         } else {
++            var->var = rzalloc(b->shader, nir_variable);
++            var->var->name = ralloc_strdup(var->var, val->name);
++            var->var->type = var->type->type;
++            var->var->interface_type = interface_type->type;
++            var->var->data.mode = nir_mode;
++         }
++
++         /* For inputs and outputs, we need to grab locations and builtin
++          * information from the interface type.
++          */
++         vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var);
++         break;
++
++      case vtn_variable_mode_param:
++         unreachable("Not created through OpVariable");
++      }
++
++      case vtn_variable_mode_ubo:
++      case vtn_variable_mode_ssbo:
++      case vtn_variable_mode_push_constant:
++         /* These don't need actual variables. */
++         break;
++      }
++
++      if (count > 4) {
++         assert(count == 5);
++         nir_constant *constant =
++            vtn_value(b, w[4], vtn_value_type_constant)->constant;
++         var->var->constant_initializer =
++            nir_constant_clone(constant, var->var);
++      }
++
++      vtn_foreach_decoration(b, val, var_decoration_cb, var);
++
++      if (var->mode == vtn_variable_mode_image ||
++          var->mode == vtn_variable_mode_sampler) {
++         /* XXX: We still need the binding information in the nir_variable
++          * for these. We should fix that.
++          */
++         var->var->data.binding = var->binding;
++         var->var->data.descriptor_set = var->descriptor_set;
++
++         if (var->mode == vtn_variable_mode_image)
++            var->var->data.image.format = without_array->image_format;
++      }
++
++      if (var->mode == vtn_variable_mode_local) {
++         assert(var->members == NULL && var->var != NULL);
++         nir_function_impl_add_variable(b->impl, var->var);
++      } else if (var->var) {
++         nir_shader_add_variable(b->shader, var->var);
++      } else if (var->members) {
++         unsigned count = glsl_get_length(without_array->type);
++         for (unsigned i = 0; i < count; i++) {
++            assert(var->members[i]->data.mode != nir_var_local);
++            nir_shader_add_variable(b->shader, var->members[i]);
++         }
++      } else {
++         assert(var->mode == vtn_variable_mode_ubo ||
++                var->mode == vtn_variable_mode_ssbo ||
++                var->mode == vtn_variable_mode_push_constant);
++      }
++      break;
++   }
++
++   case SpvOpAccessChain:
++   case SpvOpInBoundsAccessChain: {
++      struct vtn_access_chain *base, *chain;
++      struct vtn_value *base_val = vtn_untyped_value(b, w[3]);
++      if (base_val->value_type == vtn_value_type_sampled_image) {
++         /* This is rather insane.  SPIR-V allows you to use OpSampledImage
++          * to combine an array of images with a single sampler to get an
++          * array of sampled images that all share the same sampler.
++          * Fortunately, this means that we can more-or-less ignore the
++          * sampler when crawling the access chain, but it does leave us
++          * with this rather awkward little special-case.
++          */
++         base = base_val->sampled_image->image;
++      } else {
++         assert(base_val->value_type == vtn_value_type_access_chain);
++         base = base_val->access_chain;
++      }
++
++      chain = vtn_access_chain_extend(b, base, count - 4);
++
++      unsigned idx = base->length;
++      for (int i = 4; i < count; i++) {
++         struct vtn_value *link_val = vtn_untyped_value(b, w[i]);
++         if (link_val->value_type == vtn_value_type_constant) {
++            chain->link[idx].mode = vtn_access_mode_literal;
++            chain->link[idx].id = link_val->constant->value.u[0];
++         } else {
++            chain->link[idx].mode = vtn_access_mode_id;
++            chain->link[idx].id = w[i];
++         }
++         idx++;
++      }
++
++      if (base_val->value_type == vtn_value_type_sampled_image) {
++         struct vtn_value *val =
++            vtn_push_value(b, w[2], vtn_value_type_sampled_image);
++         val->sampled_image = ralloc(b, struct vtn_sampled_image);
++         val->sampled_image->image = chain;
++         val->sampled_image->sampler = base_val->sampled_image->sampler;
++      } else {
++         struct vtn_value *val =
++            vtn_push_value(b, w[2], vtn_value_type_access_chain);
++         val->access_chain = chain;
++      }
++      break;
++   }
++
++   case SpvOpCopyMemory: {
++      struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain);
++      struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain);
++
++      vtn_variable_copy(b, dest->access_chain, src->access_chain);
++      break;
++   }
++
++   case SpvOpLoad: {
++      struct vtn_access_chain *src =
++         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++
++      if (src->var->mode == vtn_variable_mode_image ||
++          src->var->mode == vtn_variable_mode_sampler) {
++         vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src;
++         return;
++      }
++
++      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++      val->ssa = vtn_variable_load(b, src);
++      break;
++   }
++
++   case SpvOpStore: {
++      struct vtn_access_chain *dest =
++         vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
++      struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]);
++      vtn_variable_store(b, src, dest);
++      break;
++   }
++
++   case SpvOpArrayLength: {
++      struct vtn_access_chain *chain =
++         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
++
++      const uint32_t offset = chain->var->type->offsets[w[4]];
++      const uint32_t stride = chain->var->type->members[w[4]]->stride;
++
++      unsigned chain_idx;
++      struct vtn_type *type;
++      nir_ssa_def *index =
++         get_vulkan_resource_index(b, chain, &type, &chain_idx);
++
++      nir_intrinsic_instr *instr =
++         nir_intrinsic_instr_create(b->nb.shader,
++                                    nir_intrinsic_get_buffer_size);
++      instr->src[0] = nir_src_for_ssa(index);
++      nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
++      nir_builder_instr_insert(&b->nb, &instr->instr);
++      nir_ssa_def *buf_size = &instr->dest.ssa;
++
++      /* array_length = max(buffer_size - offset, 0) / stride */
++      nir_ssa_def *array_length =
++         nir_idiv(&b->nb,
++                  nir_imax(&b->nb,
++                           nir_isub(&b->nb,
++                                    buf_size,
++                                    nir_imm_int(&b->nb, offset)),
++                           nir_imm_int(&b->nb, 0u)),
++                  nir_imm_int(&b->nb, stride));
++
++      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
++      val->ssa = vtn_create_ssa_value(b, glsl_uint_type());
++      val->ssa->def = array_length;
++      break;
++   }
++
++   case SpvOpCopyMemorySized:
++   default:
++      unreachable("Unhandled opcode");
++   }
++}
diff --cc src/compiler/nir/spirv2nir.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..c837186bdfce56605dcb26cdc0d3b25dd0e11446

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/compiler/nir/spirv2nir.c
@@@ -1,0 -1,0 +1,55 @@@
++/*
++ * Copyright © 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ *
++ * Authors:
++ *    Jason Ekstrand (jason@jlekstrand.net)
++ *
++ */
++
++/*
++ * A simple executable that opens a SPIR-V shader, converts it to NIR, and
++ * dumps out the result.  This should be useful for testing the
++ * spirv_to_nir code.
++ */
++
++#include "spirv/nir_spirv.h"
++
++#include <sys/mman.h>
++#include <sys/types.h>
++#include <fcntl.h>
++#include <unistd.h>
++
++int main(int argc, char **argv)
++{
++   int fd = open(argv[1], O_RDONLY);
++   off_t len = lseek(fd, 0, SEEK_END);
++
++   assert(len % 4 == 0);
++   size_t word_count = len / 4;
++
++   const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
++   assert(map != NULL);
++
++   nir_function *func = spirv_to_nir(map, word_count, NULL, 0,
++                                     MESA_SHADER_FRAGMENT, "main", NULL);
++   nir_print_shader(func->shader, stderr);
++}
diff --cc src/compiler/nir_types.cpp

index 0000000000000000000000000000000000000000,a87dcd8dc6a5e8ef48c06c0ee015200a487ae7d6..00703fe6f52010b7e534f87def84a6183d35cd78

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir_types.cpp
+++ b/src/compiler/nir_types.cpp
@@@ -1,0 -1,192 +1,336 @@@
+ /*
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ #include "nir_types.h"
+ #include "compiler/glsl/ir.h"
+ 
+ void
+ glsl_print_type(const glsl_type *type, FILE *fp)
+ {
+    if (type->base_type == GLSL_TYPE_ARRAY) {
+       glsl_print_type(type->fields.array, fp);
+       fprintf(fp, "[%u]", type->length);
+    } else if ((type->base_type == GLSL_TYPE_STRUCT)
+               && !is_gl_identifier(type->name)) {
+       fprintf(fp, "%s@%p", type->name, (void *) type);
+    } else {
+       fprintf(fp, "%s", type->name);
+    }
+ }
+ 
+ void
+ glsl_print_struct(const glsl_type *type, FILE *fp)
+ {
+    assert(type->base_type == GLSL_TYPE_STRUCT);
+ 
+    fprintf(fp, "struct {\n");
+    for (unsigned i = 0; i < type->length; i++) {
+       fprintf(fp, "\t");
+       glsl_print_type(type->fields.structure[i].type, fp);
+       fprintf(fp, " %s;\n", type->fields.structure[i].name);
+    }
+    fprintf(fp, "}\n");
+ }
+ 
+ const glsl_type *
+ glsl_get_array_element(const glsl_type* type)
+ {
+    if (type->is_matrix())
+       return type->column_type();
+    return type->fields.array;
+ }
+ 
+ const glsl_type *
+ glsl_get_struct_field(const glsl_type *type, unsigned index)
+ {
+    return type->fields.structure[index].type;
+ }
+ 
++const glsl_type *
++glsl_get_function_return_type(const glsl_type *type)
++{
++   return type->fields.parameters[0].type;
++}
++
++const glsl_function_param *
++glsl_get_function_param(const glsl_type *type, unsigned index)
++{
++   return &type->fields.parameters[index + 1];
++}
++
+ const struct glsl_type *
+ glsl_get_column_type(const struct glsl_type *type)
+ {
+    return type->column_type();
+ }
+ 
+ enum glsl_base_type
+ glsl_get_base_type(const struct glsl_type *type)
+ {
+    return type->base_type;
+ }
+ 
+ unsigned
+ glsl_get_vector_elements(const struct glsl_type *type)
+ {
+    return type->vector_elements;
+ }
+ 
+ unsigned
+ glsl_get_components(const struct glsl_type *type)
+ {
+    return type->components();
+ }
+ 
+ unsigned
+ glsl_get_matrix_columns(const struct glsl_type *type)
+ {
+    return type->matrix_columns;
+ }
+ 
+ unsigned
+ glsl_get_length(const struct glsl_type *type)
+ {
+    return type->is_matrix() ? type->matrix_columns : type->length;
+ }
+ 
+ unsigned
+ glsl_get_aoa_size(const struct glsl_type *type)
+ {
+    return type->arrays_of_arrays_size();
+ }
+ 
++unsigned
++glsl_count_attribute_slots(const struct glsl_type *type,
++                           bool vertex_input_slots)
++{
++   return type->count_attribute_slots(vertex_input_slots);
++}
++
+ const char *
+ glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index)
+ {
+    return type->fields.structure[index].name;
+ }
+ 
++glsl_sampler_dim
++glsl_get_sampler_dim(const struct glsl_type *type)
++{
++   assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
++   return (glsl_sampler_dim)type->sampler_dimensionality;
++}
++
++glsl_base_type
++glsl_get_sampler_result_type(const struct glsl_type *type)
++{
++   assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
++   return (glsl_base_type)type->sampler_type;
++}
++
+ unsigned
+ glsl_get_record_location_offset(const struct glsl_type *type,
+                                 unsigned length)
+ {
+    return type->record_location_offset(length);
+ }
+ 
+ bool
+ glsl_type_is_void(const glsl_type *type)
+ {
+    return type->is_void();
+ }
+ 
++bool
++glsl_type_is_error(const glsl_type *type)
++{
++   return type->is_error();
++}
++
+ bool
+ glsl_type_is_vector(const struct glsl_type *type)
+ {
+    return type->is_vector();
+ }
+ 
+ bool
+ glsl_type_is_scalar(const struct glsl_type *type)
+ {
+    return type->is_scalar();
+ }
+ 
+ bool
+ glsl_type_is_vector_or_scalar(const struct glsl_type *type)
+ {
+    return type->is_vector() || type->is_scalar();
+ }
+ 
+ bool
+ glsl_type_is_matrix(const struct glsl_type *type)
+ {
+    return type->is_matrix();
+ }
+ 
++bool
++glsl_type_is_array(const struct glsl_type *type)
++{
++   return type->is_array();
++}
++
++bool
++glsl_type_is_struct(const struct glsl_type *type)
++{
++   return type->is_record() || type->is_interface();
++}
++
++bool
++glsl_type_is_sampler(const struct glsl_type *type)
++{
++   return type->is_sampler();
++}
++
++bool
++glsl_type_is_image(const struct glsl_type *type)
++{
++   return type->is_image();
++}
++
++bool
++glsl_sampler_type_is_shadow(const struct glsl_type *type)
++{
++   assert(glsl_type_is_sampler(type));
++   return type->sampler_shadow;
++}
++
++bool
++glsl_sampler_type_is_array(const struct glsl_type *type)
++{
++   assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
++   return type->sampler_array;
++}
++
+ const glsl_type *
+ glsl_void_type(void)
+ {
+    return glsl_type::void_type;
+ }
+ 
+ const glsl_type *
+ glsl_float_type(void)
+ {
+    return glsl_type::float_type;
+ }
+ 
+ const glsl_type *
+ glsl_vec_type(unsigned n)
+ {
+    return glsl_type::vec(n);
+ }
+ 
+ const glsl_type *
+ glsl_vec4_type(void)
+ {
+    return glsl_type::vec4_type;
+ }
+ 
++const glsl_type *
++glsl_int_type(void)
++{
++   return glsl_type::int_type;
++}
++
+ const glsl_type *
+ glsl_uint_type(void)
+ {
+    return glsl_type::uint_type;
+ }
+ 
++const glsl_type *
++glsl_bool_type(void)
++{
++   return glsl_type::bool_type;
++}
++
++const glsl_type *
++glsl_scalar_type(enum glsl_base_type base_type)
++{
++   return glsl_type::get_instance(base_type, 1, 1);
++}
++
++const glsl_type *
++glsl_vector_type(enum glsl_base_type base_type, unsigned components)
++{
++   assert(components > 1 && components <= 4);
++   return glsl_type::get_instance(base_type, components, 1);
++}
++
++const glsl_type *
++glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns)
++{
++   assert(rows > 1 && rows <= 4 && columns >= 1 && columns <= 4);
++   return glsl_type::get_instance(base_type, rows, columns);
++}
++
+ const glsl_type *
+ glsl_array_type(const glsl_type *base, unsigned elements)
+ {
+    return glsl_type::get_array_instance(base, elements);
+ }
++
++const glsl_type *
++glsl_struct_type(const glsl_struct_field *fields,
++                 unsigned num_fields, const char *name)
++{
++   return glsl_type::get_record_instance(fields, num_fields, name);
++}
++
++const struct glsl_type *
++glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array,
++                  enum glsl_base_type base_type)
++{
++   return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type);
++}
++
++const struct glsl_type *
++glsl_image_type(enum glsl_sampler_dim dim, bool is_array,
++                enum glsl_base_type base_type)
++{
++   return glsl_type::get_image_instance(dim, is_array, base_type);
++}
++
++const glsl_type *
++glsl_function_type(const glsl_type *return_type,
++                   const glsl_function_param *params, unsigned num_params)
++{
++   return glsl_type::get_function_instance(return_type, params, num_params);
++}
++
++const glsl_type *
++glsl_transposed_type(const struct glsl_type *type)
++{
++   return glsl_type::get_instance(type->base_type, type->matrix_columns,
++                                  type->vector_elements);
++}
diff --cc src/compiler/nir_types.h

index 0000000000000000000000000000000000000000,32fc76611590b87196fd033a155ecf695a5dea75..4ef0dcf9a310ef8453b29ff2b1fbeb3c9cbc2c5f

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/nir_types.h
+++ b/src/compiler/nir_types.h
@@@ -1,0 -1,87 +1,127 @@@
+ /*
+  * Copyright © 2014 Connor Abbott
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Connor Abbott (cwabbott0@gmail.com)
+  *
+  */
+ 
+ #pragma once
+ 
+ #include <stdio.h>
+ #include <stdbool.h>
+ 
+ /* C wrapper around compiler/glsl_types.h */
+ 
+ #include "glsl_types.h"
+ 
+ #ifdef __cplusplus
+ extern "C" {
+ #else
+ struct glsl_type;
+ #endif
+ 
+ void glsl_print_type(const struct glsl_type *type, FILE *fp);
+ void glsl_print_struct(const struct glsl_type *type, FILE *fp);
+ 
+ const struct glsl_type *glsl_get_struct_field(const struct glsl_type *type,
+                                               unsigned index);
+ 
+ const struct glsl_type *glsl_get_array_element(const struct glsl_type *type);
+ 
+ const struct glsl_type *glsl_get_column_type(const struct glsl_type *type);
+ 
++const struct glsl_type *
++glsl_get_function_return_type(const struct glsl_type *type);
++
++const struct glsl_function_param *
++glsl_get_function_param(const struct glsl_type *type, unsigned index);
++
+ enum glsl_base_type glsl_get_base_type(const struct glsl_type *type);
+ 
+ unsigned glsl_get_vector_elements(const struct glsl_type *type);
+ 
+ unsigned glsl_get_components(const struct glsl_type *type);
+ 
+ unsigned glsl_get_matrix_columns(const struct glsl_type *type);
+ 
+ unsigned glsl_get_length(const struct glsl_type *type);
+ 
+ unsigned glsl_get_aoa_size(const struct glsl_type *type);
+ 
++unsigned glsl_count_attribute_slots(const struct glsl_type *type,
++                                    bool vertex_input_slots);
++
+ const char *glsl_get_struct_elem_name(const struct glsl_type *type,
+                                       unsigned index);
+ 
++enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type);
++enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type);
++
+ unsigned glsl_get_record_location_offset(const struct glsl_type *type,
+                                          unsigned length);
+ 
+ bool glsl_type_is_void(const struct glsl_type *type);
++bool glsl_type_is_error(const struct glsl_type *type);
+ bool glsl_type_is_vector(const struct glsl_type *type);
+ bool glsl_type_is_scalar(const struct glsl_type *type);
+ bool glsl_type_is_vector_or_scalar(const struct glsl_type *type);
+ bool glsl_type_is_matrix(const struct glsl_type *type);
++bool glsl_type_is_array(const struct glsl_type *type);
++bool glsl_type_is_struct(const struct glsl_type *type);
++bool glsl_type_is_sampler(const struct glsl_type *type);
++bool glsl_type_is_image(const struct glsl_type *type);
++bool glsl_sampler_type_is_shadow(const struct glsl_type *type);
++bool glsl_sampler_type_is_array(const struct glsl_type *type);
+ 
+ const struct glsl_type *glsl_void_type(void);
+ const struct glsl_type *glsl_float_type(void);
+ const struct glsl_type *glsl_vec_type(unsigned n);
+ const struct glsl_type *glsl_vec4_type(void);
++const struct glsl_type *glsl_int_type(void);
+ const struct glsl_type *glsl_uint_type(void);
++const struct glsl_type *glsl_bool_type(void);
++
++const struct glsl_type *glsl_scalar_type(enum glsl_base_type base_type);
++const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type,
++                                         unsigned components);
++const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type,
++                                         unsigned rows, unsigned columns);
+ const struct glsl_type *glsl_array_type(const struct glsl_type *base,
+                                         unsigned elements);
++const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields,
++                                         unsigned num_fields, const char *name);
++const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim,
++                                          bool is_shadow, bool is_array,
++                                          enum glsl_base_type base_type);
++const struct glsl_type *glsl_image_type(enum glsl_sampler_dim dim,
++                                        bool is_array,
++                                        enum glsl_base_type base_type);
++const struct glsl_type * glsl_function_type(const struct glsl_type *return_type,
++                                            const struct glsl_function_param *params,
++                                            unsigned num_params);
++
++const struct glsl_type *glsl_transposed_type(const struct glsl_type *type);
+ 
+ #ifdef __cplusplus
+ }
+ #endif
diff --cc src/compiler/shader_enums.c

index 0000000000000000000000000000000000000000,942d152b1296ad019f43517b1d6b32a57f03fb31..ff2f564dc986ce5d0dbea78c32dbc17fbcb5b9fc

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/shader_enums.c
+++ b/src/compiler/shader_enums.c
@@@ -1,0 -1,257 +1,260 @@@
+ /*
+  * Mesa 3-D graphics library
+  *
+  * Copyright © 2015 Red Hat
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+  * IN THE SOFTWARE.
+  *
+  * Authors:
+  *    Rob Clark <robclark@freedesktop.org>
+  */
+ 
+ #include "shader_enums.h"
+ #include "util/macros.h"
+ #include "mesa/main/config.h"
+ 
+ #define ENUM(x) [x] = #x
+ #define NAME(val) ((((val) < ARRAY_SIZE(names)) && names[(val)]) ? names[(val)] : "UNKNOWN")
+ 
+ const char *
+ gl_shader_stage_name(gl_shader_stage stage)
+ {
+    static const char *names[] = {
+       ENUM(MESA_SHADER_VERTEX),
+       ENUM(MESA_SHADER_TESS_CTRL),
+       ENUM(MESA_SHADER_TESS_EVAL),
+       ENUM(MESA_SHADER_GEOMETRY),
+       ENUM(MESA_SHADER_FRAGMENT),
+       ENUM(MESA_SHADER_COMPUTE),
+    };
+    STATIC_ASSERT(ARRAY_SIZE(names) == MESA_SHADER_STAGES);
+    return NAME(stage);
+ }
+ 
+ /**
+  * Translate a gl_shader_stage to a short shader stage name for debug
+  * printouts and error messages.
+  */
+ const char *
+ _mesa_shader_stage_to_string(unsigned stage)
+ {
+    switch (stage) {
+    case MESA_SHADER_VERTEX:   return "vertex";
+    case MESA_SHADER_FRAGMENT: return "fragment";
+    case MESA_SHADER_GEOMETRY: return "geometry";
+    case MESA_SHADER_COMPUTE:  return "compute";
+    case MESA_SHADER_TESS_CTRL: return "tessellation control";
+    case MESA_SHADER_TESS_EVAL: return "tessellation evaluation";
+    }
+ 
+    unreachable("Unknown shader stage.");
+ }
+ 
+ /**
+  * Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS)
+  * for debug printouts and error messages.
+  */
+ const char *
+ _mesa_shader_stage_to_abbrev(unsigned stage)
+ {
+    switch (stage) {
+    case MESA_SHADER_VERTEX:   return "VS";
+    case MESA_SHADER_FRAGMENT: return "FS";
+    case MESA_SHADER_GEOMETRY: return "GS";
+    case MESA_SHADER_COMPUTE:  return "CS";
+    case MESA_SHADER_TESS_CTRL: return "TCS";
+    case MESA_SHADER_TESS_EVAL: return "TES";
+    }
+ 
+    unreachable("Unknown shader stage.");
+ }
+ 
+ const char *
+ gl_vert_attrib_name(gl_vert_attrib attrib)
+ {
+    static const char *names[] = {
+       ENUM(VERT_ATTRIB_POS),
+       ENUM(VERT_ATTRIB_WEIGHT),
+       ENUM(VERT_ATTRIB_NORMAL),
+       ENUM(VERT_ATTRIB_COLOR0),
+       ENUM(VERT_ATTRIB_COLOR1),
+       ENUM(VERT_ATTRIB_FOG),
+       ENUM(VERT_ATTRIB_COLOR_INDEX),
+       ENUM(VERT_ATTRIB_EDGEFLAG),
+       ENUM(VERT_ATTRIB_TEX0),
+       ENUM(VERT_ATTRIB_TEX1),
+       ENUM(VERT_ATTRIB_TEX2),
+       ENUM(VERT_ATTRIB_TEX3),
+       ENUM(VERT_ATTRIB_TEX4),
+       ENUM(VERT_ATTRIB_TEX5),
+       ENUM(VERT_ATTRIB_TEX6),
+       ENUM(VERT_ATTRIB_TEX7),
+       ENUM(VERT_ATTRIB_POINT_SIZE),
+       ENUM(VERT_ATTRIB_GENERIC0),
+       ENUM(VERT_ATTRIB_GENERIC1),
+       ENUM(VERT_ATTRIB_GENERIC2),
+       ENUM(VERT_ATTRIB_GENERIC3),
+       ENUM(VERT_ATTRIB_GENERIC4),
+       ENUM(VERT_ATTRIB_GENERIC5),
+       ENUM(VERT_ATTRIB_GENERIC6),
+       ENUM(VERT_ATTRIB_GENERIC7),
+       ENUM(VERT_ATTRIB_GENERIC8),
+       ENUM(VERT_ATTRIB_GENERIC9),
+       ENUM(VERT_ATTRIB_GENERIC10),
+       ENUM(VERT_ATTRIB_GENERIC11),
+       ENUM(VERT_ATTRIB_GENERIC12),
+       ENUM(VERT_ATTRIB_GENERIC13),
+       ENUM(VERT_ATTRIB_GENERIC14),
+       ENUM(VERT_ATTRIB_GENERIC15),
+    };
+    STATIC_ASSERT(ARRAY_SIZE(names) == VERT_ATTRIB_MAX);
+    return NAME(attrib);
+ }
+ 
+ const char *
+ gl_varying_slot_name(gl_varying_slot slot)
+ {
+    static const char *names[] = {
+       ENUM(VARYING_SLOT_POS),
+       ENUM(VARYING_SLOT_COL0),
+       ENUM(VARYING_SLOT_COL1),
+       ENUM(VARYING_SLOT_FOGC),
+       ENUM(VARYING_SLOT_TEX0),
+       ENUM(VARYING_SLOT_TEX1),
+       ENUM(VARYING_SLOT_TEX2),
+       ENUM(VARYING_SLOT_TEX3),
+       ENUM(VARYING_SLOT_TEX4),
+       ENUM(VARYING_SLOT_TEX5),
+       ENUM(VARYING_SLOT_TEX6),
+       ENUM(VARYING_SLOT_TEX7),
+       ENUM(VARYING_SLOT_PSIZ),
+       ENUM(VARYING_SLOT_BFC0),
+       ENUM(VARYING_SLOT_BFC1),
+       ENUM(VARYING_SLOT_EDGE),
+       ENUM(VARYING_SLOT_CLIP_VERTEX),
+       ENUM(VARYING_SLOT_CLIP_DIST0),
+       ENUM(VARYING_SLOT_CLIP_DIST1),
+       ENUM(VARYING_SLOT_PRIMITIVE_ID),
+       ENUM(VARYING_SLOT_LAYER),
+       ENUM(VARYING_SLOT_VIEWPORT),
+       ENUM(VARYING_SLOT_FACE),
+       ENUM(VARYING_SLOT_PNTC),
+       ENUM(VARYING_SLOT_TESS_LEVEL_OUTER),
+       ENUM(VARYING_SLOT_TESS_LEVEL_INNER),
+       ENUM(VARYING_SLOT_VAR0),
+       ENUM(VARYING_SLOT_VAR1),
+       ENUM(VARYING_SLOT_VAR2),
+       ENUM(VARYING_SLOT_VAR3),
+       ENUM(VARYING_SLOT_VAR4),
+       ENUM(VARYING_SLOT_VAR5),
+       ENUM(VARYING_SLOT_VAR6),
+       ENUM(VARYING_SLOT_VAR7),
+       ENUM(VARYING_SLOT_VAR8),
+       ENUM(VARYING_SLOT_VAR9),
+       ENUM(VARYING_SLOT_VAR10),
+       ENUM(VARYING_SLOT_VAR11),
+       ENUM(VARYING_SLOT_VAR12),
+       ENUM(VARYING_SLOT_VAR13),
+       ENUM(VARYING_SLOT_VAR14),
+       ENUM(VARYING_SLOT_VAR15),
+       ENUM(VARYING_SLOT_VAR16),
+       ENUM(VARYING_SLOT_VAR17),
+       ENUM(VARYING_SLOT_VAR18),
+       ENUM(VARYING_SLOT_VAR19),
+       ENUM(VARYING_SLOT_VAR20),
+       ENUM(VARYING_SLOT_VAR21),
+       ENUM(VARYING_SLOT_VAR22),
+       ENUM(VARYING_SLOT_VAR23),
+       ENUM(VARYING_SLOT_VAR24),
+       ENUM(VARYING_SLOT_VAR25),
+       ENUM(VARYING_SLOT_VAR26),
+       ENUM(VARYING_SLOT_VAR27),
+       ENUM(VARYING_SLOT_VAR28),
+       ENUM(VARYING_SLOT_VAR29),
+       ENUM(VARYING_SLOT_VAR30),
+       ENUM(VARYING_SLOT_VAR31),
+    };
+    STATIC_ASSERT(ARRAY_SIZE(names) == VARYING_SLOT_MAX);
+    return NAME(slot);
+ }
+ 
+ const char *
+ gl_system_value_name(gl_system_value sysval)
+ {
+    static const char *names[] = {
+      ENUM(SYSTEM_VALUE_VERTEX_ID),
+      ENUM(SYSTEM_VALUE_INSTANCE_ID),
++     ENUM(SYSTEM_VALUE_INSTANCE_INDEX),
+      ENUM(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE),
+      ENUM(SYSTEM_VALUE_BASE_VERTEX),
+      ENUM(SYSTEM_VALUE_INVOCATION_ID),
+      ENUM(SYSTEM_VALUE_FRONT_FACE),
+      ENUM(SYSTEM_VALUE_SAMPLE_ID),
+      ENUM(SYSTEM_VALUE_SAMPLE_POS),
+      ENUM(SYSTEM_VALUE_SAMPLE_MASK_IN),
+      ENUM(SYSTEM_VALUE_TESS_COORD),
+      ENUM(SYSTEM_VALUE_VERTICES_IN),
+      ENUM(SYSTEM_VALUE_PRIMITIVE_ID),
+      ENUM(SYSTEM_VALUE_TESS_LEVEL_OUTER),
+      ENUM(SYSTEM_VALUE_TESS_LEVEL_INNER),
+      ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID),
++     ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX),
++     ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_ID),
+      ENUM(SYSTEM_VALUE_WORK_GROUP_ID),
+      ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS),
+      ENUM(SYSTEM_VALUE_VERTEX_CNT),
+    };
+    STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
+    return NAME(sysval);
+ }
+ 
+ const char *
+ glsl_interp_qualifier_name(enum glsl_interp_qualifier qual)
+ {
+    static const char *names[] = {
+       ENUM(INTERP_QUALIFIER_NONE),
+       ENUM(INTERP_QUALIFIER_SMOOTH),
+       ENUM(INTERP_QUALIFIER_FLAT),
+       ENUM(INTERP_QUALIFIER_NOPERSPECTIVE),
+    };
+    STATIC_ASSERT(ARRAY_SIZE(names) == INTERP_QUALIFIER_COUNT);
+    return NAME(qual);
+ }
+ 
+ const char *
+ gl_frag_result_name(gl_frag_result result)
+ {
+    static const char *names[] = {
+       ENUM(FRAG_RESULT_DEPTH),
+       ENUM(FRAG_RESULT_STENCIL),
+       ENUM(FRAG_RESULT_COLOR),
+       ENUM(FRAG_RESULT_SAMPLE_MASK),
+       ENUM(FRAG_RESULT_DATA0),
+       ENUM(FRAG_RESULT_DATA1),
+       ENUM(FRAG_RESULT_DATA2),
+       ENUM(FRAG_RESULT_DATA3),
+       ENUM(FRAG_RESULT_DATA4),
+       ENUM(FRAG_RESULT_DATA5),
+       ENUM(FRAG_RESULT_DATA6),
+       ENUM(FRAG_RESULT_DATA7),
+    };
+    STATIC_ASSERT(ARRAY_SIZE(names) == FRAG_RESULT_MAX);
+    return NAME(result);
+ }
diff --cc src/compiler/shader_enums.h

index 0000000000000000000000000000000000000000,efc0b0d515e43b11d482463c44e5b1e41ccdad2e..e3f46e3d7394a323e3744a6c76835ef998532c22

mode 000000,100644..100644
--- /dev/null
--- 2/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@@ -1,0 -1,542 +1,551 @@@
+ /*
+  * Mesa 3-D graphics library
+  *
+  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+  * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included
+  * in all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  */
+ 
+ #ifndef SHADER_ENUMS_H
+ #define SHADER_ENUMS_H
+ 
+ #ifdef __cplusplus
+ extern "C" {
+ #endif
+ 
+ /**
+  * Shader stages. Note that these will become 5 with tessellation.
+  *
+  * The order must match how shaders are ordered in the pipeline.
+  * The GLSL linker assumes that if i<j, then the j-th shader is
+  * executed later than the i-th shader.
+  */
+ typedef enum
+ {
+    MESA_SHADER_VERTEX = 0,
+    MESA_SHADER_TESS_CTRL = 1,
+    MESA_SHADER_TESS_EVAL = 2,
+    MESA_SHADER_GEOMETRY = 3,
+    MESA_SHADER_FRAGMENT = 4,
+    MESA_SHADER_COMPUTE = 5,
+ } gl_shader_stage;
+ 
+ const char *gl_shader_stage_name(gl_shader_stage stage);
+ 
+ /**
+  * Translate a gl_shader_stage to a short shader stage name for debug
+  * printouts and error messages.
+  */
+ const char *_mesa_shader_stage_to_string(unsigned stage);
+ 
+ /**
+  * Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS)
+  * for debug printouts and error messages.
+  */
+ const char *_mesa_shader_stage_to_abbrev(unsigned stage);
+ 
+ #define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1)
+ 
+ 
+ /**
+  * Indexes for vertex program attributes.
+  * GL_NV_vertex_program aliases generic attributes over the conventional
+  * attributes.  In GL_ARB_vertex_program shader the aliasing is optional.
+  * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the
+  * generic attributes are distinct/separate).
+  */
+ typedef enum
+ {
+    VERT_ATTRIB_POS = 0,
+    VERT_ATTRIB_WEIGHT = 1,
+    VERT_ATTRIB_NORMAL = 2,
+    VERT_ATTRIB_COLOR0 = 3,
+    VERT_ATTRIB_COLOR1 = 4,
+    VERT_ATTRIB_FOG = 5,
+    VERT_ATTRIB_COLOR_INDEX = 6,
+    VERT_ATTRIB_EDGEFLAG = 7,
+    VERT_ATTRIB_TEX0 = 8,
+    VERT_ATTRIB_TEX1 = 9,
+    VERT_ATTRIB_TEX2 = 10,
+    VERT_ATTRIB_TEX3 = 11,
+    VERT_ATTRIB_TEX4 = 12,
+    VERT_ATTRIB_TEX5 = 13,
+    VERT_ATTRIB_TEX6 = 14,
+    VERT_ATTRIB_TEX7 = 15,
+    VERT_ATTRIB_POINT_SIZE = 16,
+    VERT_ATTRIB_GENERIC0 = 17,
+    VERT_ATTRIB_GENERIC1 = 18,
+    VERT_ATTRIB_GENERIC2 = 19,
+    VERT_ATTRIB_GENERIC3 = 20,
+    VERT_ATTRIB_GENERIC4 = 21,
+    VERT_ATTRIB_GENERIC5 = 22,
+    VERT_ATTRIB_GENERIC6 = 23,
+    VERT_ATTRIB_GENERIC7 = 24,
+    VERT_ATTRIB_GENERIC8 = 25,
+    VERT_ATTRIB_GENERIC9 = 26,
+    VERT_ATTRIB_GENERIC10 = 27,
+    VERT_ATTRIB_GENERIC11 = 28,
+    VERT_ATTRIB_GENERIC12 = 29,
+    VERT_ATTRIB_GENERIC13 = 30,
+    VERT_ATTRIB_GENERIC14 = 31,
+    VERT_ATTRIB_GENERIC15 = 32,
+    VERT_ATTRIB_MAX = 33
+ } gl_vert_attrib;
+ 
+ const char *gl_vert_attrib_name(gl_vert_attrib attrib);
+ 
+ /**
+  * Symbolic constats to help iterating over
+  * specific blocks of vertex attributes.
+  *
+  * VERT_ATTRIB_FF
+  *   includes all fixed function attributes as well as
+  *   the aliased GL_NV_vertex_program shader attributes.
+  * VERT_ATTRIB_TEX
+  *   include the classic texture coordinate attributes.
+  *   Is a subset of VERT_ATTRIB_FF.
+  * VERT_ATTRIB_GENERIC
+  *   include the OpenGL 2.0+ GLSL generic shader attributes.
+  *   These alias the generic GL_ARB_vertex_shader attributes.
+  */
+ #define VERT_ATTRIB_FF(i)           (VERT_ATTRIB_POS + (i))
+ #define VERT_ATTRIB_FF_MAX          VERT_ATTRIB_GENERIC0
+ 
+ #define VERT_ATTRIB_TEX(i)          (VERT_ATTRIB_TEX0 + (i))
+ #define VERT_ATTRIB_TEX_MAX         MAX_TEXTURE_COORD_UNITS
+ 
+ #define VERT_ATTRIB_GENERIC(i)      (VERT_ATTRIB_GENERIC0 + (i))
+ #define VERT_ATTRIB_GENERIC_MAX     MAX_VERTEX_GENERIC_ATTRIBS
+ 
+ /**
+  * Bitflags for vertex attributes.
+  * These are used in bitfields in many places.
+  */
+ /*@{*/
+ #define VERT_BIT_POS             BITFIELD64_BIT(VERT_ATTRIB_POS)
+ #define VERT_BIT_WEIGHT          BITFIELD64_BIT(VERT_ATTRIB_WEIGHT)
+ #define VERT_BIT_NORMAL          BITFIELD64_BIT(VERT_ATTRIB_NORMAL)
+ #define VERT_BIT_COLOR0          BITFIELD64_BIT(VERT_ATTRIB_COLOR0)
+ #define VERT_BIT_COLOR1          BITFIELD64_BIT(VERT_ATTRIB_COLOR1)
+ #define VERT_BIT_FOG             BITFIELD64_BIT(VERT_ATTRIB_FOG)
+ #define VERT_BIT_COLOR_INDEX     BITFIELD64_BIT(VERT_ATTRIB_COLOR_INDEX)
+ #define VERT_BIT_EDGEFLAG        BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG)
+ #define VERT_BIT_TEX0            BITFIELD64_BIT(VERT_ATTRIB_TEX0)
+ #define VERT_BIT_TEX1            BITFIELD64_BIT(VERT_ATTRIB_TEX1)
+ #define VERT_BIT_TEX2            BITFIELD64_BIT(VERT_ATTRIB_TEX2)
+ #define VERT_BIT_TEX3            BITFIELD64_BIT(VERT_ATTRIB_TEX3)
+ #define VERT_BIT_TEX4            BITFIELD64_BIT(VERT_ATTRIB_TEX4)
+ #define VERT_BIT_TEX5            BITFIELD64_BIT(VERT_ATTRIB_TEX5)
+ #define VERT_BIT_TEX6            BITFIELD64_BIT(VERT_ATTRIB_TEX6)
+ #define VERT_BIT_TEX7            BITFIELD64_BIT(VERT_ATTRIB_TEX7)
+ #define VERT_BIT_POINT_SIZE      BITFIELD64_BIT(VERT_ATTRIB_POINT_SIZE)
+ #define VERT_BIT_GENERIC0        BITFIELD64_BIT(VERT_ATTRIB_GENERIC0)
+ 
+ #define VERT_BIT(i)              BITFIELD64_BIT(i)
+ #define VERT_BIT_ALL             BITFIELD64_RANGE(0, VERT_ATTRIB_MAX)
+ 
+ #define VERT_BIT_FF(i)           VERT_BIT(i)
+ #define VERT_BIT_FF_ALL          BITFIELD64_RANGE(0, VERT_ATTRIB_FF_MAX)
+ #define VERT_BIT_TEX(i)          VERT_BIT(VERT_ATTRIB_TEX(i))
+ #define VERT_BIT_TEX_ALL         \
+    BITFIELD64_RANGE(VERT_ATTRIB_TEX(0), VERT_ATTRIB_TEX_MAX)
+ 
+ #define VERT_BIT_GENERIC(i)      VERT_BIT(VERT_ATTRIB_GENERIC(i))
+ #define VERT_BIT_GENERIC_ALL     \
+    BITFIELD64_RANGE(VERT_ATTRIB_GENERIC(0), VERT_ATTRIB_GENERIC_MAX)
+ /*@}*/
+ 
+ 
+ /**
+  * Indexes for vertex shader outputs, geometry shader inputs/outputs, and
+  * fragment shader inputs.
+  *
+  * Note that some of these values are not available to all pipeline stages.
+  *
+  * When this enum is updated, the following code must be updated too:
+  * - vertResults (in prog_print.c's arb_output_attrib_string())
+  * - fragAttribs (in prog_print.c's arb_input_attrib_string())
+  * - _mesa_varying_slot_in_fs()
+  */
+ typedef enum
+ {
+    VARYING_SLOT_POS,
+    VARYING_SLOT_COL0, /* COL0 and COL1 must be contiguous */
+    VARYING_SLOT_COL1,
+    VARYING_SLOT_FOGC,
+    VARYING_SLOT_TEX0, /* TEX0-TEX7 must be contiguous */
+    VARYING_SLOT_TEX1,
+    VARYING_SLOT_TEX2,
+    VARYING_SLOT_TEX3,
+    VARYING_SLOT_TEX4,
+    VARYING_SLOT_TEX5,
+    VARYING_SLOT_TEX6,
+    VARYING_SLOT_TEX7,
+    VARYING_SLOT_PSIZ, /* Does not appear in FS */
+    VARYING_SLOT_BFC0, /* Does not appear in FS */
+    VARYING_SLOT_BFC1, /* Does not appear in FS */
+    VARYING_SLOT_EDGE, /* Does not appear in FS */
+    VARYING_SLOT_CLIP_VERTEX, /* Does not appear in FS */
+    VARYING_SLOT_CLIP_DIST0,
+    VARYING_SLOT_CLIP_DIST1,
+    VARYING_SLOT_PRIMITIVE_ID, /* Does not appear in VS */
+    VARYING_SLOT_LAYER, /* Appears as VS or GS output */
+    VARYING_SLOT_VIEWPORT, /* Appears as VS or GS output */
+    VARYING_SLOT_FACE, /* FS only */
+    VARYING_SLOT_PNTC, /* FS only */
+    VARYING_SLOT_TESS_LEVEL_OUTER, /* Only appears as TCS output. */
+    VARYING_SLOT_TESS_LEVEL_INNER, /* Only appears as TCS output. */
+    VARYING_SLOT_VAR0, /* First generic varying slot */
+    /* the remaining are simply for the benefit of gl_varying_slot_name()
+     * and not to be construed as an upper bound:
+     */
+    VARYING_SLOT_VAR1,
+    VARYING_SLOT_VAR2,
+    VARYING_SLOT_VAR3,
+    VARYING_SLOT_VAR4,
+    VARYING_SLOT_VAR5,
+    VARYING_SLOT_VAR6,
+    VARYING_SLOT_VAR7,
+    VARYING_SLOT_VAR8,
+    VARYING_SLOT_VAR9,
+    VARYING_SLOT_VAR10,
+    VARYING_SLOT_VAR11,
+    VARYING_SLOT_VAR12,
+    VARYING_SLOT_VAR13,
+    VARYING_SLOT_VAR14,
+    VARYING_SLOT_VAR15,
+    VARYING_SLOT_VAR16,
+    VARYING_SLOT_VAR17,
+    VARYING_SLOT_VAR18,
+    VARYING_SLOT_VAR19,
+    VARYING_SLOT_VAR20,
+    VARYING_SLOT_VAR21,
+    VARYING_SLOT_VAR22,
+    VARYING_SLOT_VAR23,
+    VARYING_SLOT_VAR24,
+    VARYING_SLOT_VAR25,
+    VARYING_SLOT_VAR26,
+    VARYING_SLOT_VAR27,
+    VARYING_SLOT_VAR28,
+    VARYING_SLOT_VAR29,
+    VARYING_SLOT_VAR30,
+    VARYING_SLOT_VAR31,
+ } gl_varying_slot;
+ 
+ 
+ #define VARYING_SLOT_MAX      (VARYING_SLOT_VAR0 + MAX_VARYING)
+ #define VARYING_SLOT_PATCH0   (VARYING_SLOT_MAX)
+ #define VARYING_SLOT_TESS_MAX (VARYING_SLOT_PATCH0 + MAX_VARYING)
+ 
+ const char *gl_varying_slot_name(gl_varying_slot slot);
+ 
+ /**
+  * Bitflags for varying slots.
+  */
+ /*@{*/
+ #define VARYING_BIT_POS BITFIELD64_BIT(VARYING_SLOT_POS)
+ #define VARYING_BIT_COL0 BITFIELD64_BIT(VARYING_SLOT_COL0)
+ #define VARYING_BIT_COL1 BITFIELD64_BIT(VARYING_SLOT_COL1)
+ #define VARYING_BIT_FOGC BITFIELD64_BIT(VARYING_SLOT_FOGC)
+ #define VARYING_BIT_TEX0 BITFIELD64_BIT(VARYING_SLOT_TEX0)
+ #define VARYING_BIT_TEX1 BITFIELD64_BIT(VARYING_SLOT_TEX1)
+ #define VARYING_BIT_TEX2 BITFIELD64_BIT(VARYING_SLOT_TEX2)
+ #define VARYING_BIT_TEX3 BITFIELD64_BIT(VARYING_SLOT_TEX3)
+ #define VARYING_BIT_TEX4 BITFIELD64_BIT(VARYING_SLOT_TEX4)
+ #define VARYING_BIT_TEX5 BITFIELD64_BIT(VARYING_SLOT_TEX5)
+ #define VARYING_BIT_TEX6 BITFIELD64_BIT(VARYING_SLOT_TEX6)
+ #define VARYING_BIT_TEX7 BITFIELD64_BIT(VARYING_SLOT_TEX7)
+ #define VARYING_BIT_TEX(U) BITFIELD64_BIT(VARYING_SLOT_TEX0 + (U))
+ #define VARYING_BITS_TEX_ANY BITFIELD64_RANGE(VARYING_SLOT_TEX0, \
+                                               MAX_TEXTURE_COORD_UNITS)
+ #define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ)
+ #define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0)
+ #define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1)
+ #define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE)
+ #define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX)
+ #define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)
+ #define VARYING_BIT_CLIP_DIST1 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1)
+ #define VARYING_BIT_PRIMITIVE_ID BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID)
+ #define VARYING_BIT_LAYER BITFIELD64_BIT(VARYING_SLOT_LAYER)
+ #define VARYING_BIT_VIEWPORT BITFIELD64_BIT(VARYING_SLOT_VIEWPORT)
+ #define VARYING_BIT_FACE BITFIELD64_BIT(VARYING_SLOT_FACE)
+ #define VARYING_BIT_PNTC BITFIELD64_BIT(VARYING_SLOT_PNTC)
+ #define VARYING_BIT_TESS_LEVEL_OUTER BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_OUTER)
+ #define VARYING_BIT_TESS_LEVEL_INNER BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER)
+ #define VARYING_BIT_VAR(V) BITFIELD64_BIT(VARYING_SLOT_VAR0 + (V))
+ /*@}*/
+ 
+ /**
+  * Bitflags for system values.
+  */
+ #define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID)
+ #define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS)
+ #define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_MASK_IN)
+ #define SYSTEM_BIT_LOCAL_INVOCATION_ID ((uint64_t)1 << SYSTEM_VALUE_LOCAL_INVOCATION_ID)
+ 
+ /**
+  * If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index will be
+  * one of these values.  If a NIR variable's mode is nir_var_system_value, it
+  * will be one of these values.
+  */
+ typedef enum
+ {
+    /**
+     * \name Vertex shader system values
+     */
+    /*@{*/
+    /**
+     * OpenGL-style vertex ID.
+     *
+     * Section 2.11.7 (Shader Execution), subsection Shader Inputs, of the
+     * OpenGL 3.3 core profile spec says:
+     *
+     *     "gl_VertexID holds the integer index i implicitly passed by
+     *     DrawArrays or one of the other drawing commands defined in section
+     *     2.8.3."
+     *
+     * Section 2.8.3 (Drawing Commands) of the same spec says:
+     *
+     *     "The commands....are equivalent to the commands with the same base
+     *     name (without the BaseVertex suffix), except that the ith element
+     *     transferred by the corresponding draw call will be taken from
+     *     element indices[i] + basevertex of each enabled array."
+     *
+     * Additionally, the overview in the GL_ARB_shader_draw_parameters spec
+     * says:
+     *
+     *     "In unextended GL, vertex shaders have inputs named gl_VertexID and
+     *     gl_InstanceID, which contain, respectively the index of the vertex
+     *     and instance. The value of gl_VertexID is the implicitly passed
+     *     index of the vertex being processed, which includes the value of
+     *     baseVertex, for those commands that accept it."
+     *
+     * gl_VertexID gets basevertex added in.  This differs from DirectX where
+     * SV_VertexID does \b not get basevertex added in.
+     *
+     * \note
+     * If all system values are available, \c SYSTEM_VALUE_VERTEX_ID will be
+     * equal to \c SYSTEM_VALUE_VERTEX_ID_ZERO_BASE plus
+     * \c SYSTEM_VALUE_BASE_VERTEX.
+     *
+     * \sa SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, SYSTEM_VALUE_BASE_VERTEX
+     */
+    SYSTEM_VALUE_VERTEX_ID,
+ 
+    /**
+     * Instanced ID as supplied to gl_InstanceID
+     *
+     * Values assigned to gl_InstanceID always begin with zero, regardless of
+     * the value of baseinstance.
+     *
+     * Section 11.1.3.9 (Shader Inputs) of the OpenGL 4.4 core profile spec
+     * says:
+     *
+     *     "gl_InstanceID holds the integer instance number of the current
+     *     primitive in an instanced draw call (see section 10.5)."
+     *
+     * Through a big chain of pseudocode, section 10.5 describes that
+     * baseinstance is not counted by gl_InstanceID.  In that section, notice
+     *
+     *     "If an enabled vertex attribute array is instanced (it has a
+     *     non-zero divisor as specified by VertexAttribDivisor), the element
+     *     index that is transferred to the GL, for all vertices, is given by
+     *
+     *         floor(instance/divisor) + baseinstance
+     *
+     *     If an array corresponding to an attribute required by a vertex
+     *     shader is not enabled, then the corresponding element is taken from
+     *     the current attribute state (see section 10.2)."
+     *
+     * Note that baseinstance is \b not included in the value of instance.
+     */
+    SYSTEM_VALUE_INSTANCE_ID,
+ 
++   /**
++    * Vulkan InstanceIndex.
++    *
++    * InstanceIndex = gl_InstanceID + gl_BaseInstance
++    */
++   SYSTEM_VALUE_INSTANCE_INDEX,
++
+    /**
+     * DirectX-style vertex ID.
+     *
+     * Unlike \c SYSTEM_VALUE_VERTEX_ID, this system value does \b not include
+     * the value of basevertex.
+     *
+     * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_BASE_VERTEX
+     */
+    SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+ 
+    /**
+     * Value of \c basevertex passed to \c glDrawElementsBaseVertex and similar
+     * functions.
+     *
+     * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
+     */
+    SYSTEM_VALUE_BASE_VERTEX,
+ 
+    /**
+     * Value of \c baseinstance passed to instanced draw entry points
+     *
+     * \sa SYSTEM_VALUE_INSTANCE_ID
+     */
+    SYSTEM_VALUE_BASE_INSTANCE,
+ 
+    /**
+     * From _ARB_shader_draw_parameters:
+     *
+     *   "Additionally, this extension adds a further built-in variable,
+     *    gl_DrawID to the shading language. This variable contains the index
+     *    of the draw currently being processed by a Multi* variant of a
+     *    drawing command (such as MultiDrawElements or
+     *    MultiDrawArraysIndirect)."
+     *
+     * If GL_ARB_multi_draw_indirect is not supported, this is always 0.
+     */
+    SYSTEM_VALUE_DRAW_ID,
+    /*@}*/
+ 
+    /**
+     * \name Geometry shader system values
+     */
+    /*@{*/
+    SYSTEM_VALUE_INVOCATION_ID,  /**< (Also in Tessellation Control shader) */
+    /*@}*/
+ 
+    /**
+     * \name Fragment shader system values
+     */
+    /*@{*/
+    SYSTEM_VALUE_FRAG_COORD,
+    SYSTEM_VALUE_FRONT_FACE,
+    SYSTEM_VALUE_SAMPLE_ID,
+    SYSTEM_VALUE_SAMPLE_POS,
+    SYSTEM_VALUE_SAMPLE_MASK_IN,
+    SYSTEM_VALUE_HELPER_INVOCATION,
+    /*@}*/
+ 
+    /**
+     * \name Tessellation Evaluation shader system values
+     */
+    /*@{*/
+    SYSTEM_VALUE_TESS_COORD,
+    SYSTEM_VALUE_VERTICES_IN,    /**< Tessellation vertices in input patch */
+    SYSTEM_VALUE_PRIMITIVE_ID,
+    SYSTEM_VALUE_TESS_LEVEL_OUTER, /**< TES input */
+    SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */
+    /*@}*/
+ 
+    /**
+     * \name Compute shader system values
+     */
+    /*@{*/
+    SYSTEM_VALUE_LOCAL_INVOCATION_ID,
++   SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
++   SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
+    SYSTEM_VALUE_WORK_GROUP_ID,
+    SYSTEM_VALUE_NUM_WORK_GROUPS,
+    /*@}*/
+ 
+    /**
+     * Driver internal vertex-count, used (for example) for drivers to
+     * calculate stride for stream-out outputs.  Not externally visible.
+     */
+    SYSTEM_VALUE_VERTEX_CNT,
+ 
+    SYSTEM_VALUE_MAX             /**< Number of values */
+ } gl_system_value;
+ 
+ const char *gl_system_value_name(gl_system_value sysval);
+ 
+ /**
+  * The possible interpolation qualifiers that can be applied to a fragment
+  * shader input in GLSL.
+  *
+  * Note: INTERP_QUALIFIER_NONE must be 0 so that memsetting the
+  * gl_fragment_program data structure to 0 causes the default behavior.
+  */
+ enum glsl_interp_qualifier
+ {
+    INTERP_QUALIFIER_NONE = 0,
+    INTERP_QUALIFIER_SMOOTH,
+    INTERP_QUALIFIER_FLAT,
+    INTERP_QUALIFIER_NOPERSPECTIVE,
+    INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */
+ };
+ 
+ const char *glsl_interp_qualifier_name(enum glsl_interp_qualifier qual);
+ 
+ /**
+  * Fragment program results
+  */
+ typedef enum
+ {
+    FRAG_RESULT_DEPTH = 0,
+    FRAG_RESULT_STENCIL = 1,
+    /* If a single color should be written to all render targets, this
+     * register is written.  No FRAG_RESULT_DATAn will be written.
+     */
+    FRAG_RESULT_COLOR = 2,
+    FRAG_RESULT_SAMPLE_MASK = 3,
+ 
+    /* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n]
+     * or ARB_fragment_program fragment.color[n]) color results.  If
+     * any are written, FRAG_RESULT_COLOR will not be written.
+     * FRAG_RESULT_DATA1 and up are simply for the benefit of
+     * gl_frag_result_name() and not to be construed as an upper bound
+     */
+    FRAG_RESULT_DATA0 = 4,
+    FRAG_RESULT_DATA1,
+    FRAG_RESULT_DATA2,
+    FRAG_RESULT_DATA3,
+    FRAG_RESULT_DATA4,
+    FRAG_RESULT_DATA5,
+    FRAG_RESULT_DATA6,
+    FRAG_RESULT_DATA7,
+ } gl_frag_result;
+ 
+ const char *gl_frag_result_name(gl_frag_result result);
+ 
+ #define FRAG_RESULT_MAX               (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
+ 
+ /**
+  * \brief Layout qualifiers for gl_FragDepth.
+  *
+  * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with
+  * a layout qualifier.
+  *
+  * \see enum ir_depth_layout
+  */
+ enum gl_frag_depth_layout
+ {
+    FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */
+    FRAG_DEPTH_LAYOUT_ANY,
+    FRAG_DEPTH_LAYOUT_GREATER,
+    FRAG_DEPTH_LAYOUT_LESS,
+    FRAG_DEPTH_LAYOUT_UNCHANGED
+ };
+ 
+ #ifdef __cplusplus
+ } /* extern "C" */
+ #endif
+ 
+ #endif /* SHADER_ENUMS_H */
diff --cc src/mesa/drivers/dri/i965/brw_compiler.c

index 3d93772c69177e73c58208ef5acbee6bf30d7e8d,0000000000000000000000000000000000000000..0401e3970315d36ea38a5bd4a77135302011f677

mode 100644,000000..100644
--- 1/src/mesa/drivers/dri/i965/brw_compiler.c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@@ -1,180 -1,0 +1,180 @@@
- #include "glsl/nir/nir.h"
+ +/*
+ + * Copyright © 2015-2016 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include "brw_compiler.h"
+ +#include "brw_context.h"
++#include "nir.h"
+ +#include "main/errors.h"
+ +#include "util/debug.h"
+ +
+ +static void
+ +shader_debug_log_mesa(void *data, const char *fmt, ...)
+ +{
+ +   struct brw_context *brw = (struct brw_context *)data;
+ +   va_list args;
+ +
+ +   va_start(args, fmt);
+ +   GLuint msg_id = 0;
+ +   _mesa_gl_vdebug(&brw->ctx, &msg_id,
+ +                   MESA_DEBUG_SOURCE_SHADER_COMPILER,
+ +                   MESA_DEBUG_TYPE_OTHER,
+ +                   MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args);
+ +   va_end(args);
+ +}
+ +
+ +static void
+ +shader_perf_log_mesa(void *data, const char *fmt, ...)
+ +{
+ +   struct brw_context *brw = (struct brw_context *)data;
+ +
+ +   va_list args;
+ +   va_start(args, fmt);
+ +
+ +   if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+ +      va_list args_copy;
+ +      va_copy(args_copy, args);
+ +      vfprintf(stderr, fmt, args_copy);
+ +      va_end(args_copy);
+ +   }
+ +
+ +   if (brw->perf_debug) {
+ +      GLuint msg_id = 0;
+ +      _mesa_gl_vdebug(&brw->ctx, &msg_id,
+ +                      MESA_DEBUG_SOURCE_SHADER_COMPILER,
+ +                      MESA_DEBUG_TYPE_PERFORMANCE,
+ +                      MESA_DEBUG_SEVERITY_MEDIUM, fmt, args);
+ +   }
+ +   va_end(args);
+ +}
+ +
+ +#define COMMON_OPTIONS                                                        \
+ +   /* In order to help allow for better CSE at the NIR level we tell NIR to   \
+ +    * split all ffma instructions during opt_algebraic and we then re-combine \
+ +    * them as a later step.                                                   \
+ +    */                                                                        \
+ +   .lower_ffma = true,                                                        \
+ +   .lower_sub = true,                                                         \
+ +   .lower_fdiv = true,                                                        \
+ +   .lower_scmp = true,                                                        \
+ +   .lower_fmod = true,                                                        \
+ +   .lower_bitfield_extract = true,                                            \
+ +   .lower_bitfield_insert = true,                                             \
+ +   .lower_uadd_carry = true,                                                  \
+ +   .lower_usub_borrow = true,                                                 \
+ +   .lower_fdiv = true,                                                        \
+ +   .native_integers = true,                                                   \
+ +   .vertex_id_zero_based = true
+ +
+ +static const struct nir_shader_compiler_options scalar_nir_options = {
+ +   COMMON_OPTIONS,
+ +   .lower_pack_half_2x16 = true,
+ +   .lower_pack_snorm_2x16 = true,
+ +   .lower_pack_snorm_4x8 = true,
+ +   .lower_pack_unorm_2x16 = true,
+ +   .lower_pack_unorm_4x8 = true,
+ +   .lower_unpack_half_2x16 = true,
+ +   .lower_unpack_snorm_2x16 = true,
+ +   .lower_unpack_snorm_4x8 = true,
+ +   .lower_unpack_unorm_2x16 = true,
+ +   .lower_unpack_unorm_4x8 = true,
+ +};
+ +
+ +static const struct nir_shader_compiler_options vector_nir_options = {
+ +   COMMON_OPTIONS,
+ +
+ +   /* In the vec4 backend, our dpN instruction replicates its result to all the
+ +    * components of a vec4.  We would like NIR to give us replicated fdot
+ +    * instructions because it can optimize better for us.
+ +    */
+ +   .fdot_replicates = true,
+ +
+ +   .lower_pack_snorm_2x16 = true,
+ +   .lower_pack_unorm_2x16 = true,
+ +   .lower_unpack_snorm_2x16 = true,
+ +   .lower_unpack_unorm_2x16 = true,
+ +   .lower_extract_byte = true,
+ +   .lower_extract_word = true,
+ +};
+ +
+ +struct brw_compiler *
+ +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
+ +{
+ +   struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
+ +
+ +   compiler->devinfo = devinfo;
+ +   compiler->shader_debug_log = shader_debug_log_mesa;
+ +   compiler->shader_perf_log = shader_perf_log_mesa;
+ +
+ +   brw_fs_alloc_reg_sets(compiler);
+ +   brw_vec4_alloc_reg_set(compiler);
+ +
+ +   compiler->scalar_stage[MESA_SHADER_VERTEX] =
+ +      devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
+ +   compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false;
+ +   compiler->scalar_stage[MESA_SHADER_TESS_EVAL] =
+ +      devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true);
+ +   compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
+ +      devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", true);
+ +   compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
+ +   compiler->scalar_stage[MESA_SHADER_COMPUTE] = true;
+ +
+ +   /* We want the GLSL compiler to emit code that uses condition codes */
+ +   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ +      compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
+ +      compiler->glsl_compiler_options[i].MaxIfDepth =
+ +         devinfo->gen < 6 ? 16 : UINT_MAX;
+ +
+ +      compiler->glsl_compiler_options[i].EmitCondCodes = true;
+ +      compiler->glsl_compiler_options[i].EmitNoNoise = true;
+ +      compiler->glsl_compiler_options[i].EmitNoMainReturn = true;
+ +      compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
+ +      compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
+ +      compiler->glsl_compiler_options[i].LowerClipDistance = true;
+ +
+ +      bool is_scalar = compiler->scalar_stage[i];
+ +
+ +      compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
+ +      compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
+ +      compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar;
+ +
+ +      /* !ARB_gpu_shader5 */
+ +      if (devinfo->gen < 7)
+ +         compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
+ +
+ +      compiler->glsl_compiler_options[i].NirOptions =
+ +         is_scalar ? &scalar_nir_options : &vector_nir_options;
+ +
+ +      compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
+ +   }
+ +
+ +   compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false;
+ +   compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false;
+ +
+ +   if (compiler->scalar_stage[MESA_SHADER_GEOMETRY])
+ +      compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false;
+ +
+ +   compiler->glsl_compiler_options[MESA_SHADER_COMPUTE]
+ +      .LowerShaderSharedVariables = true;
+ +
+ +   return compiler;
+ +}
diff --cc src/mesa/drivers/dri/i965/brw_fs.cpp
Simple merge
diff --cc src/mesa/drivers/dri/i965/brw_fs.h
Simple merge
diff --cc src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
Simple merge
diff --cc src/mesa/drivers/dri/i965/brw_fs_nir.cpp
Simple merge
diff --cc src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
Simple merge
diff --cc src/mesa/drivers/dri/i965/brw_link.cpp
Simple merge
diff --cc src/mesa/drivers/dri/i965/brw_nir.c
Simple merge
diff --cc src/mesa/drivers/dri/i965/brw_program.c
Simple merge
diff --cc src/mesa/drivers/dri/i965/brw_vec4.h
Simple merge
diff --cc src/mesa/main/mtypes.h
Simple merge
diff --cc src/mesa/program/ir_to_mesa.cpp
Simple merge
diff --cc src/mesa/program/prog_to_nir.c
Simple merge
diff --cc src/mesa/state_tracker/st_glsl_to_tgsi.cpp
Simple merge
diff --cc src/vulkan/Makefile.am

index 06f67cfd5f867dd37a0b2fe7d2f502fa84280953,0000000000000000000000000000000000000000..58668c7cf884f152fb2578ea1a434c40e9f66c44

mode 100644,000000..100644
--- 1/src/vulkan/Makefile.am
--- /dev/null
+++ b/src/vulkan/Makefile.am
@@@ -1,180 -1,0 +1,180 @@@
-       -I$(top_srcdir)/src/glsl/nir \
+ +# Copyright © 2015 Intel Corporation
+ +#
+ +# Permission is hereby granted, free of charge, to any person obtaining a
+ +# copy of this software and associated documentation files (the "Software"),
+ +# to deal in the Software without restriction, including without limitation
+ +# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ +# and/or sell copies of the Software, and to permit persons to whom the
+ +# Software is furnished to do so, subject to the following conditions:
+ +#
+ +# The above copyright notice and this permission notice (including the next
+ +# paragraph) shall be included in all copies or substantial portions of the
+ +# Software.
+ +#
+ +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ +# IN THE SOFTWARE.
+ +
+ +SUBDIRS = . tests
+ +
+ +vulkan_includedir = $(includedir)/vulkan
+ +
+ +vulkan_include_HEADERS =                              \
+ +      $(top_srcdir)/include/vulkan/vk_platform.h      \
+ +      $(top_srcdir)/include/vulkan/vulkan.h           \
+ +      $(top_srcdir)/include/vulkan/vulkan_intel.h
+ +
+ +# Used when generating entrypoints to filter out unwanted extensions
+ +VULKAN_ENTRYPOINT_CPPFLAGS = \
+ +   -I$(top_srcdir)/include/vulkan \
+ +   -DVK_USE_PLATFORM_XCB_KHR \
+ +   -DVK_USE_PLATFORM_WAYLAND_KHR
+ +
+ +lib_LTLIBRARIES = libvulkan.la
+ +
+ +check_LTLIBRARIES = libvulkan-test.la
+ +
+ +PER_GEN_LIBS = \
+ +   libanv-gen7.la \
+ +   libanv-gen75.la \
+ +   libanv-gen8.la \
+ +   libanv-gen9.la
+ +
+ +noinst_LTLIBRARIES = $(PER_GEN_LIBS)
+ +
+ +# The gallium includes are for the util/u_math.h include from main/macros.h
+ +
+ +AM_CPPFLAGS = \
+ +      $(INTEL_CFLAGS) \
+ +      $(VALGRIND_CFLAGS) \
+ +      $(DEFINES) \
+ +      -I$(top_srcdir)/include \
+ +      -I$(top_srcdir)/src \
-       -I$(top_builddir)/src/glsl/nir \
++      -I$(top_srcdir)/src/compiler \
+ +      -I$(top_srcdir)/src/mapi \
+ +      -I$(top_srcdir)/src/mesa \
+ +      -I$(top_srcdir)/src/mesa/drivers/dri/common \
+ +      -I$(top_srcdir)/src/mesa/drivers/dri/i965 \
+ +      -I$(top_srcdir)/src/gallium/auxiliary \
+ +      -I$(top_srcdir)/src/gallium/include \
+ +      -I$(top_srcdir)/src/isl/ \
+ +      -I$(top_builddir)/src \
++      -I$(top_builddir)/src/compiler \
+ +      -I$(top_builddir)/src/vulkan
+ +
+ +libvulkan_la_CFLAGS = $(CFLAGS) -Wno-override-init
+ +
+ +VULKAN_SOURCES =                                        \
+ +      anv_allocator.c                                 \
+ +      anv_cmd_buffer.c                                \
+ +      anv_batch_chain.c                               \
+ +      anv_descriptor_set.c                            \
+ +      anv_device.c                                    \
+ +        anv_dump.c                                      \
+ +      anv_entrypoints.c                               \
+ +      anv_entrypoints.h                               \
+ +      anv_formats.c                                   \
+ +      anv_image.c                                     \
+ +      anv_intel.c                                     \
+ +      anv_meta.c                                      \
+ +      anv_meta_clear.c                                \
+ +      anv_meta_resolve.c                              \
+ +      anv_nir_apply_dynamic_offsets.c                 \
+ +      anv_nir_apply_pipeline_layout.c                 \
+ +      anv_nir_lower_push_constants.c                  \
+ +      anv_pass.c                                      \
+ +      anv_pipeline.c                                  \
+ +      anv_private.h                                   \
+ +      anv_query.c                                     \
+ +      anv_util.c                                      \
+ +      anv_wsi.c                                       \
+ +      anv_wsi_x11.c
+ +
+ +BUILT_SOURCES =                                         \
+ +      anv_entrypoints.h                               \
+ +      anv_entrypoints.c
+ +
+ +libanv_gen7_la_SOURCES =                                \
+ +      genX_cmd_buffer.c                               \
+ +      genX_pipeline.c                                 \
+ +      gen7_cmd_buffer.c                               \
+ +      gen7_pipeline.c                                 \
+ +      gen7_state.c
+ +libanv_gen7_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=70
+ +
+ +libanv_gen75_la_SOURCES =                               \
+ +      genX_cmd_buffer.c                               \
+ +      genX_pipeline.c                                 \
+ +      gen7_cmd_buffer.c                               \
+ +      gen7_pipeline.c                                 \
+ +      gen7_state.c
+ +libanv_gen75_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=75
+ +
+ +libanv_gen8_la_SOURCES =                                      \
+ +      genX_cmd_buffer.c                               \
+ +      genX_pipeline.c                                 \
+ +      gen8_cmd_buffer.c                               \
+ +      gen8_pipeline.c                                 \
+ +      gen8_state.c
+ +libanv_gen8_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=80
+ +
+ +libanv_gen9_la_SOURCES =                                      \
+ +      genX_cmd_buffer.c                               \
+ +      genX_pipeline.c                                 \
+ +      gen8_cmd_buffer.c                               \
+ +      gen8_pipeline.c                                 \
+ +      gen8_state.c
+ +libanv_gen9_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=90
+ +
+ +if HAVE_EGL_PLATFORM_WAYLAND
+ +BUILT_SOURCES += \
+ +      wayland-drm-protocol.c \
+ +      wayland-drm-client-protocol.h
+ +
+ +%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml
+ +      $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@
+ +
+ +%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml
+ +      $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@
+ +
+ +AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm
+ +VULKAN_SOURCES += \
+ +      wayland-drm-protocol.c \
+ +      anv_wsi_wayland.c
+ +libvulkan_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM
+ +endif
+ +
+ +libvulkan_la_SOURCES =                                  \
+ +      $(VULKAN_SOURCES)                               \
+ +      anv_gem.c
+ +
+ +anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
+ +      $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< header > $@
+ +
+ +anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
+ +      $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@
+ +
+ +CLEANFILES = $(BUILT_SOURCES)
+ +
+ +libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \
+ +      $(top_builddir)/src/isl/libisl.la \
+ +      $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \
+ +      ../mesa/libmesa.la \
+ +      ../mesa/drivers/dri/common/libdri_test_stubs.la \
+ +      -lpthread -ldl -lstdc++ \
+ +        $(PER_GEN_LIBS)
+ +
+ +# Libvulkan with dummy gem. Used for unit tests.
+ +
+ +libvulkan_test_la_SOURCES =                             \
+ +      $(VULKAN_SOURCES)                               \
+ +      anv_gem_stubs.c
+ +
+ +libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS)
+ +libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD)
+ +
+ +include $(top_srcdir)/install-lib-links.mk
diff --cc src/vulkan/anv_meta.c

index b40151c2be6ba884b3379eda1109c3d5798d6072,0000000000000000000000000000000000000000..72a927a08eef2290e18da5869b7d31129b07545d

mode 100644,000000..100644
--- 1/src/vulkan/anv_meta.c
--- /dev/null
+++ b/src/vulkan/anv_meta.c
@@@ -1,1596 -1,0 +1,1596 @@@
- #include "glsl/nir/nir_builder.h"
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include <assert.h>
+ +#include <stdbool.h>
+ +#include <string.h>
+ +#include <unistd.h>
+ +#include <fcntl.h>
+ +
+ +#include "anv_meta.h"
+ +#include "anv_private.h"
++#include "nir/nir_builder.h"
+ +
+ +struct anv_render_pass anv_meta_dummy_renderpass = {0};
+ +
+ +static nir_shader *
+ +build_nir_vertex_shader(bool attr_flat)
+ +{
+ +   nir_builder b;
+ +
+ +   const struct glsl_type *vertex_type = glsl_vec4_type();
+ +
+ +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+ +   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
+ +
+ +   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ +                                              vertex_type, "a_pos");
+ +   pos_in->data.location = VERT_ATTRIB_GENERIC0;
+ +   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+ +                                               vertex_type, "gl_Position");
+ +   pos_out->data.location = VARYING_SLOT_POS;
+ +   nir_copy_var(&b, pos_out, pos_in);
+ +
+ +   /* Add one more pass-through attribute.  For clear shaders, this is used
+ +    * to store the color and for blit shaders it's the texture coordinate.
+ +    */
+ +   const struct glsl_type *attr_type = glsl_vec4_type();
+ +   nir_variable *attr_in = nir_variable_create(b.shader, nir_var_shader_in,
+ +                                               attr_type, "a_attr");
+ +   attr_in->data.location = VERT_ATTRIB_GENERIC1;
+ +   nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out,
+ +                                                attr_type, "v_attr");
+ +   attr_out->data.location = VARYING_SLOT_VAR0;
+ +   attr_out->data.interpolation = attr_flat ? INTERP_QUALIFIER_FLAT :
+ +                                              INTERP_QUALIFIER_SMOOTH;
+ +   nir_copy_var(&b, attr_out, attr_in);
+ +
+ +   return b.shader;
+ +}
+ +
+ +static nir_shader *
+ +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
+ +{
+ +   nir_builder b;
+ +
+ +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ +   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs");
+ +
+ +   const struct glsl_type *color_type = glsl_vec4_type();
+ +
+ +   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ +                                                  glsl_vec4_type(), "v_attr");
+ +   tex_pos_in->data.location = VARYING_SLOT_VAR0;
+ +
+ +   /* Swizzle the array index which comes in as Z coordinate into the right
+ +    * position.
+ +    */
+ +   unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
+ +   nir_ssa_def *const tex_pos =
+ +      nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
+ +                  (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false);
+ +
+ +   const struct glsl_type *sampler_type =
+ +      glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
+ +                        glsl_get_base_type(color_type));
+ +   nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
+ +                                               sampler_type, "s_tex");
+ +   sampler->data.descriptor_set = 0;
+ +   sampler->data.binding = 0;
+ +
+ +   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
+ +   tex->sampler_dim = tex_dim;
+ +   tex->op = nir_texop_tex;
+ +   tex->src[0].src_type = nir_tex_src_coord;
+ +   tex->src[0].src = nir_src_for_ssa(tex_pos);
+ +   tex->dest_type = nir_type_float; /* TODO */
+ +   tex->is_array = glsl_sampler_type_is_array(sampler_type);
+ +   tex->coord_components = tex_pos->num_components;
+ +   tex->sampler = nir_deref_var_create(tex, sampler);
+ +
+ +   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex");
+ +   nir_builder_instr_insert(&b, &tex->instr);
+ +
+ +   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+ +                                                 color_type, "f_color");
+ +   color_out->data.location = FRAG_RESULT_DATA0;
+ +   nir_store_var(&b, color_out, &tex->dest.ssa, 4);
+ +
+ +   return b.shader;
+ +}
+ +
+ +void
+ +anv_meta_save(struct anv_meta_saved_state *state,
+ +              const struct anv_cmd_buffer *cmd_buffer,
+ +              uint32_t dynamic_mask)
+ +{
+ +   state->old_pipeline = cmd_buffer->state.pipeline;
+ +   state->old_descriptor_set0 = cmd_buffer->state.descriptors[0];
+ +   memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings,
+ +          sizeof(state->old_vertex_bindings));
+ +
+ +   state->dynamic_mask = dynamic_mask;
+ +   anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic,
+ +                          dynamic_mask);
+ +}
+ +
+ +void
+ +anv_meta_restore(const struct anv_meta_saved_state *state,
+ +                 struct anv_cmd_buffer *cmd_buffer)
+ +{
+ +   cmd_buffer->state.pipeline = state->old_pipeline;
+ +   cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
+ +   memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings,
+ +          sizeof(state->old_vertex_bindings));
+ +
+ +   cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1;
+ +   cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE;
+ +   cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
+ +
+ +   anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic,
+ +                          state->dynamic_mask);
+ +   cmd_buffer->state.dirty |= state->dynamic_mask;
+ +
+ +   /* Since we've used the pipeline with the VS disabled, set
+ +    * need_query_wa. See CmdBeginQuery.
+ +    */
+ +   cmd_buffer->state.need_query_wa = true;
+ +}
+ +
+ +VkImageViewType
+ +anv_meta_get_view_type(const struct anv_image *image)
+ +{
+ +   switch (image->type) {
+ +   case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D;
+ +   case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D;
+ +   case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D;
+ +   default:
+ +      unreachable("bad VkImageViewType");
+ +   }
+ +}
+ +
+ +/**
+ + * When creating a destination VkImageView, this function provides the needed
+ + * VkImageViewCreateInfo::subresourceRange::baseArrayLayer.
+ + */
+ +uint32_t
+ +anv_meta_get_iview_layer(const struct anv_image *dest_image,
+ +                         const VkImageSubresourceLayers *dest_subresource,
+ +                         const VkOffset3D *dest_offset)
+ +{
+ +   switch (dest_image->type) {
+ +   case VK_IMAGE_TYPE_1D:
+ +   case VK_IMAGE_TYPE_2D:
+ +      return dest_subresource->baseArrayLayer;
+ +   case VK_IMAGE_TYPE_3D:
+ +      /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer,
+ +       * but meta does it anyway. When doing so, we translate the
+ +       * destination's z offset into an array offset.
+ +       */
+ +      return dest_offset->z;
+ +   default:
+ +      assert(!"bad VkImageType");
+ +      return 0;
+ +   }
+ +}
+ +
+ +static VkResult
+ +anv_device_init_meta_blit_state(struct anv_device *device)
+ +{
+ +   VkResult result;
+ +
+ +   result = anv_CreateRenderPass(anv_device_to_handle(device),
+ +      &(VkRenderPassCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ +         .attachmentCount = 1,
+ +         .pAttachments = &(VkAttachmentDescription) {
+ +            .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
+ +            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ +            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ +            .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +            .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +         },
+ +         .subpassCount = 1,
+ +         .pSubpasses = &(VkSubpassDescription) {
+ +            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +            .inputAttachmentCount = 0,
+ +            .colorAttachmentCount = 1,
+ +            .pColorAttachments = &(VkAttachmentReference) {
+ +               .attachment = 0,
+ +               .layout = VK_IMAGE_LAYOUT_GENERAL,
+ +            },
+ +            .pResolveAttachments = NULL,
+ +            .pDepthStencilAttachment = &(VkAttachmentReference) {
+ +               .attachment = VK_ATTACHMENT_UNUSED,
+ +               .layout = VK_IMAGE_LAYOUT_GENERAL,
+ +            },
+ +            .preserveAttachmentCount = 1,
+ +            .pPreserveAttachments = (uint32_t[]) { 0 },
+ +         },
+ +         .dependencyCount = 0,
+ +      }, &device->meta_state.alloc, &device->meta_state.blit.render_pass);
+ +   if (result != VK_SUCCESS)
+ +      goto fail;
+ +
+ +   /* We don't use a vertex shader for clearing, but instead build and pass
+ +    * the VUEs directly to the rasterization backend.  However, we do need
+ +    * to provide GLSL source for the vertex shader so that the compiler
+ +    * does not dead-code our inputs.
+ +    */
+ +   struct anv_shader_module vs = {
+ +      .nir = build_nir_vertex_shader(false),
+ +   };
+ +
+ +   struct anv_shader_module fs_1d = {
+ +      .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D),
+ +   };
+ +
+ +   struct anv_shader_module fs_2d = {
+ +      .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D),
+ +   };
+ +
+ +   struct anv_shader_module fs_3d = {
+ +      .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D),
+ +   };
+ +
+ +   VkPipelineVertexInputStateCreateInfo vi_create_info = {
+ +      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ +      .vertexBindingDescriptionCount = 2,
+ +      .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ +         {
+ +            .binding = 0,
+ +            .stride = 0,
+ +            .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ +         },
+ +         {
+ +            .binding = 1,
+ +            .stride = 5 * sizeof(float),
+ +            .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ +         },
+ +      },
+ +      .vertexAttributeDescriptionCount = 3,
+ +      .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ +         {
+ +            /* VUE Header */
+ +            .location = 0,
+ +            .binding = 0,
+ +            .format = VK_FORMAT_R32G32B32A32_UINT,
+ +            .offset = 0
+ +         },
+ +         {
+ +            /* Position */
+ +            .location = 1,
+ +            .binding = 1,
+ +            .format = VK_FORMAT_R32G32_SFLOAT,
+ +            .offset = 0
+ +         },
+ +         {
+ +            /* Texture Coordinate */
+ +            .location = 2,
+ +            .binding = 1,
+ +            .format = VK_FORMAT_R32G32B32_SFLOAT,
+ +            .offset = 8
+ +         }
+ +      }
+ +   };
+ +
+ +   VkDescriptorSetLayoutCreateInfo ds_layout_info = {
+ +      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ +      .bindingCount = 1,
+ +      .pBindings = (VkDescriptorSetLayoutBinding[]) {
+ +         {
+ +            .binding = 0,
+ +            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ +            .descriptorCount = 1,
+ +            .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ +            .pImmutableSamplers = NULL
+ +         },
+ +      }
+ +   };
+ +   result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
+ +                                          &ds_layout_info,
+ +                                          &device->meta_state.alloc,
+ +                                          &device->meta_state.blit.ds_layout);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_render_pass;
+ +
+ +   result = anv_CreatePipelineLayout(anv_device_to_handle(device),
+ +      &(VkPipelineLayoutCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ +         .setLayoutCount = 1,
+ +         .pSetLayouts = &device->meta_state.blit.ds_layout,
+ +      },
+ +      &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_descriptor_set_layout;
+ +
+ +   VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+ +      {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ +         .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ +         .module = anv_shader_module_to_handle(&vs),
+ +         .pName = "main",
+ +         .pSpecializationInfo = NULL
+ +      }, {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ +         .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ +         .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
+ +         .pName = "main",
+ +         .pSpecializationInfo = NULL
+ +      },
+ +   };
+ +
+ +   const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+ +      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ +      .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+ +      .pStages = pipeline_shader_stages,
+ +      .pVertexInputState = &vi_create_info,
+ +      .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ +         .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ +         .primitiveRestartEnable = false,
+ +      },
+ +      .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ +         .viewportCount = 1,
+ +         .scissorCount = 1,
+ +      },
+ +      .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ +         .rasterizerDiscardEnable = false,
+ +         .polygonMode = VK_POLYGON_MODE_FILL,
+ +         .cullMode = VK_CULL_MODE_NONE,
+ +         .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
+ +      },
+ +      .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ +         .rasterizationSamples = 1,
+ +         .sampleShadingEnable = false,
+ +         .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
+ +      },
+ +      .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ +         .attachmentCount = 1,
+ +         .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+ +            { .colorWriteMask =
+ +                 VK_COLOR_COMPONENT_A_BIT |
+ +                 VK_COLOR_COMPONENT_R_BIT |
+ +                 VK_COLOR_COMPONENT_G_BIT |
+ +                 VK_COLOR_COMPONENT_B_BIT },
+ +         }
+ +      },
+ +      .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ +         .dynamicStateCount = 9,
+ +         .pDynamicStates = (VkDynamicState[]) {
+ +            VK_DYNAMIC_STATE_VIEWPORT,
+ +            VK_DYNAMIC_STATE_SCISSOR,
+ +            VK_DYNAMIC_STATE_LINE_WIDTH,
+ +            VK_DYNAMIC_STATE_DEPTH_BIAS,
+ +            VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ +            VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ +            VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ +            VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+ +            VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ +         },
+ +      },
+ +      .flags = 0,
+ +      .layout = device->meta_state.blit.pipeline_layout,
+ +      .renderPass = device->meta_state.blit.render_pass,
+ +      .subpass = 0,
+ +   };
+ +
+ +   const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
+ +      .color_attachment_count = -1,
+ +      .use_repclear = false,
+ +      .disable_viewport = true,
+ +      .disable_scissor = true,
+ +      .disable_vs = true,
+ +      .use_rectlist = true
+ +   };
+ +
+ +   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d);
+ +   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ +      VK_NULL_HANDLE,
+ +      &vk_pipeline_info, &anv_pipeline_info,
+ +      &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_pipeline_layout;
+ +
+ +   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d);
+ +   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ +      VK_NULL_HANDLE,
+ +      &vk_pipeline_info, &anv_pipeline_info,
+ +      &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_pipeline_1d;
+ +
+ +   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d);
+ +   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ +      VK_NULL_HANDLE,
+ +      &vk_pipeline_info, &anv_pipeline_info,
+ +      &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_pipeline_2d;
+ +
+ +   ralloc_free(vs.nir);
+ +   ralloc_free(fs_1d.nir);
+ +   ralloc_free(fs_2d.nir);
+ +   ralloc_free(fs_3d.nir);
+ +
+ +   return VK_SUCCESS;
+ +
+ + fail_pipeline_2d:
+ +   anv_DestroyPipeline(anv_device_to_handle(device),
+ +                       device->meta_state.blit.pipeline_2d_src,
+ +                       &device->meta_state.alloc);
+ +
+ + fail_pipeline_1d:
+ +   anv_DestroyPipeline(anv_device_to_handle(device),
+ +                       device->meta_state.blit.pipeline_1d_src,
+ +                       &device->meta_state.alloc);
+ +
+ + fail_pipeline_layout:
+ +   anv_DestroyPipelineLayout(anv_device_to_handle(device),
+ +                             device->meta_state.blit.pipeline_layout,
+ +                             &device->meta_state.alloc);
+ + fail_descriptor_set_layout:
+ +   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+ +                                  device->meta_state.blit.ds_layout,
+ +                                  &device->meta_state.alloc);
+ + fail_render_pass:
+ +   anv_DestroyRenderPass(anv_device_to_handle(device),
+ +                         device->meta_state.blit.render_pass,
+ +                         &device->meta_state.alloc);
+ +
+ +   ralloc_free(vs.nir);
+ +   ralloc_free(fs_1d.nir);
+ +   ralloc_free(fs_2d.nir);
+ +   ralloc_free(fs_3d.nir);
+ + fail:
+ +   return result;
+ +}
+ +
+ +static void
+ +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer,
+ +                  struct anv_meta_saved_state *saved_state)
+ +{
+ +   anv_meta_save(saved_state, cmd_buffer,
+ +                 (1 << VK_DYNAMIC_STATE_VIEWPORT));
+ +}
+ +
+ +struct blit_region {
+ +   VkOffset3D src_offset;
+ +   VkExtent3D src_extent;
+ +   VkOffset3D dest_offset;
+ +   VkExtent3D dest_extent;
+ +};
+ +
+ +/* Returns the user-provided VkBufferImageCopy::imageOffset in units of
+ + * elements rather than texels. One element equals one texel or one block
+ + * if Image is uncompressed or compressed, respectively.
+ + */
+ +static struct VkOffset3D
+ +meta_region_offset_el(const struct anv_image * image,
+ +                      const struct VkOffset3D * offset)
+ +{
+ +   const struct isl_format_layout * isl_layout = image->format->isl_layout;
+ +   return (VkOffset3D) {
+ +      .x = offset->x / isl_layout->bw,
+ +      .y = offset->y / isl_layout->bh,
+ +      .z = offset->z / isl_layout->bd,
+ +   };
+ +}
+ +
+ +/* Returns the user-provided VkBufferImageCopy::imageExtent in units of
+ + * elements rather than texels. One element equals one texel or one block
+ + * if Image is uncompressed or compressed, respectively.
+ + */
+ +static struct VkExtent3D
+ +meta_region_extent_el(const VkFormat format,
+ +                      const struct VkExtent3D * extent)
+ +{
+ +   const struct isl_format_layout * isl_layout =
+ +      anv_format_for_vk_format(format)->isl_layout;
+ +   return (VkExtent3D) {
+ +      .width  = DIV_ROUND_UP(extent->width , isl_layout->bw),
+ +      .height = DIV_ROUND_UP(extent->height, isl_layout->bh),
+ +      .depth  = DIV_ROUND_UP(extent->depth , isl_layout->bd),
+ +   };
+ +}
+ +
+ +static void
+ +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
+ +               struct anv_image *src_image,
+ +               struct anv_image_view *src_iview,
+ +               VkOffset3D src_offset,
+ +               VkExtent3D src_extent,
+ +               struct anv_image *dest_image,
+ +               struct anv_image_view *dest_iview,
+ +               VkOffset3D dest_offset,
+ +               VkExtent3D dest_extent,
+ +               VkFilter blit_filter)
+ +{
+ +   struct anv_device *device = cmd_buffer->device;
+ +   VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1;
+ +
+ +   struct blit_vb_data {
+ +      float pos[2];
+ +      float tex_coord[3];
+ +   } *vb_data;
+ +
+ +   assert(src_image->samples == dest_image->samples);
+ +
+ +   unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
+ +
+ +   struct anv_state vb_state =
+ +      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
+ +   memset(vb_state.map, 0, sizeof(struct anv_vue_header));
+ +   vb_data = vb_state.map + sizeof(struct anv_vue_header);
+ +
+ +   vb_data[0] = (struct blit_vb_data) {
+ +      .pos = {
+ +         dest_offset.x + dest_extent.width,
+ +         dest_offset.y + dest_extent.height,
+ +      },
+ +      .tex_coord = {
+ +         (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width,
+ +         (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height,
+ +         (float)src_offset.z / (float)src_iview->extent.depth,
+ +      },
+ +   };
+ +
+ +   vb_data[1] = (struct blit_vb_data) {
+ +      .pos = {
+ +         dest_offset.x,
+ +         dest_offset.y + dest_extent.height,
+ +      },
+ +      .tex_coord = {
+ +         (float)src_offset.x / (float)src_iview->extent.width,
+ +         (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height,
+ +         (float)src_offset.z / (float)src_iview->extent.depth,
+ +      },
+ +   };
+ +
+ +   vb_data[2] = (struct blit_vb_data) {
+ +      .pos = {
+ +         dest_offset.x,
+ +         dest_offset.y,
+ +      },
+ +      .tex_coord = {
+ +         (float)src_offset.x / (float)src_iview->extent.width,
+ +         (float)src_offset.y / (float)src_iview->extent.height,
+ +         (float)src_offset.z / (float)src_iview->extent.depth,
+ +      },
+ +   };
+ +
+ +   anv_state_clflush(vb_state);
+ +
+ +   struct anv_buffer vertex_buffer = {
+ +      .device = device,
+ +      .size = vb_size,
+ +      .bo = &device->dynamic_state_block_pool.bo,
+ +      .offset = vb_state.offset,
+ +   };
+ +
+ +   anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
+ +      (VkBuffer[]) {
+ +         anv_buffer_to_handle(&vertex_buffer),
+ +         anv_buffer_to_handle(&vertex_buffer)
+ +      },
+ +      (VkDeviceSize[]) {
+ +         0,
+ +         sizeof(struct anv_vue_header),
+ +      });
+ +
+ +   VkSampler sampler;
+ +   ANV_CALL(CreateSampler)(anv_device_to_handle(device),
+ +      &(VkSamplerCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ +         .magFilter = blit_filter,
+ +         .minFilter = blit_filter,
+ +      }, &cmd_buffer->pool->alloc, &sampler);
+ +
+ +   VkDescriptorSet set;
+ +   anv_AllocateDescriptorSets(anv_device_to_handle(device),
+ +      &(VkDescriptorSetAllocateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ +         .descriptorPool = dummy_desc_pool,
+ +         .descriptorSetCount = 1,
+ +         .pSetLayouts = &device->meta_state.blit.ds_layout
+ +      }, &set);
+ +   anv_UpdateDescriptorSets(anv_device_to_handle(device),
+ +      1, /* writeCount */
+ +      (VkWriteDescriptorSet[]) {
+ +         {
+ +            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ +            .dstSet = set,
+ +            .dstBinding = 0,
+ +            .dstArrayElement = 0,
+ +            .descriptorCount = 1,
+ +            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ +            .pImageInfo = (VkDescriptorImageInfo[]) {
+ +               {
+ +                  .sampler = sampler,
+ +                  .imageView = anv_image_view_to_handle(src_iview),
+ +                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +               },
+ +            }
+ +         }
+ +      }, 0, NULL);
+ +
+ +   VkFramebuffer fb;
+ +   anv_CreateFramebuffer(anv_device_to_handle(device),
+ +      &(VkFramebufferCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ +         .attachmentCount = 1,
+ +         .pAttachments = (VkImageView[]) {
+ +            anv_image_view_to_handle(dest_iview),
+ +         },
+ +         .width = dest_iview->extent.width,
+ +         .height = dest_iview->extent.height,
+ +         .layers = 1
+ +      }, &cmd_buffer->pool->alloc, &fb);
+ +
+ +   ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+ +      &(VkRenderPassBeginInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ +         .renderPass = device->meta_state.blit.render_pass,
+ +         .framebuffer = fb,
+ +         .renderArea = {
+ +            .offset = { dest_offset.x, dest_offset.y },
+ +            .extent = { dest_extent.width, dest_extent.height },
+ +         },
+ +         .clearValueCount = 0,
+ +         .pClearValues = NULL,
+ +      }, VK_SUBPASS_CONTENTS_INLINE);
+ +
+ +   VkPipeline pipeline;
+ +
+ +   switch (src_image->type) {
+ +   case VK_IMAGE_TYPE_1D:
+ +      pipeline = device->meta_state.blit.pipeline_1d_src;
+ +      break;
+ +   case VK_IMAGE_TYPE_2D:
+ +      pipeline = device->meta_state.blit.pipeline_2d_src;
+ +      break;
+ +   case VK_IMAGE_TYPE_3D:
+ +      pipeline = device->meta_state.blit.pipeline_3d_src;
+ +      break;
+ +   default:
+ +      unreachable(!"bad VkImageType");
+ +   }
+ +
+ +   if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
+ +      anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
+ +                          VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ +   }
+ +
+ +   anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ +                      &(VkViewport) {
+ +                        .x = 0.0f,
+ +                        .y = 0.0f,
+ +                        .width = dest_iview->extent.width,
+ +                        .height = dest_iview->extent.height,
+ +                        .minDepth = 0.0f,
+ +                        .maxDepth = 1.0f,
+ +                      });
+ +
+ +   anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
+ +                             VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +                             device->meta_state.blit.pipeline_layout, 0, 1,
+ +                             &set, 0, NULL);
+ +
+ +   ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+ +
+ +   ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+ +
+ +   /* At the point where we emit the draw call, all data from the
+ +    * descriptor sets, etc. has been used.  We are free to delete it.
+ +    */
+ +   anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set));
+ +   anv_DestroySampler(anv_device_to_handle(device), sampler,
+ +                      &cmd_buffer->pool->alloc);
+ +   anv_DestroyFramebuffer(anv_device_to_handle(device), fb,
+ +                          &cmd_buffer->pool->alloc);
+ +}
+ +
+ +static void
+ +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer,
+ +                 const struct anv_meta_saved_state *saved_state)
+ +{
+ +   anv_meta_restore(saved_state, cmd_buffer);
+ +}
+ +
+ +static VkFormat
+ +vk_format_for_size(int bs)
+ +{
+ +   /* Note: We intentionally use the 4-channel formats whenever we can.
+ +    * This is so that, when we do a RGB <-> RGBX copy, the two formats will
+ +    * line up even though one of them is 3/4 the size of the other.
+ +    */
+ +   switch (bs) {
+ +   case 1: return VK_FORMAT_R8_UINT;
+ +   case 2: return VK_FORMAT_R8G8_UINT;
+ +   case 3: return VK_FORMAT_R8G8B8_UINT;
+ +   case 4: return VK_FORMAT_R8G8B8A8_UINT;
+ +   case 6: return VK_FORMAT_R16G16B16_UINT;
+ +   case 8: return VK_FORMAT_R16G16B16A16_UINT;
+ +   case 12: return VK_FORMAT_R32G32B32_UINT;
+ +   case 16: return VK_FORMAT_R32G32B32A32_UINT;
+ +   default:
+ +      unreachable("Invalid format block size");
+ +   }
+ +}
+ +
+ +static void
+ +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer,
+ +               struct anv_bo *src, uint64_t src_offset,
+ +               struct anv_bo *dest, uint64_t dest_offset,
+ +               int width, int height, VkFormat copy_format)
+ +{
+ +   VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
+ +
+ +   VkImageCreateInfo image_info = {
+ +      .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ +      .imageType = VK_IMAGE_TYPE_2D,
+ +      .format = copy_format,
+ +      .extent = {
+ +         .width = width,
+ +         .height = height,
+ +         .depth = 1,
+ +      },
+ +      .mipLevels = 1,
+ +      .arrayLayers = 1,
+ +      .samples = 1,
+ +      .tiling = VK_IMAGE_TILING_LINEAR,
+ +      .usage = 0,
+ +      .flags = 0,
+ +   };
+ +
+ +   VkImage src_image;
+ +   image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT;
+ +   anv_CreateImage(vk_device, &image_info,
+ +                   &cmd_buffer->pool->alloc, &src_image);
+ +
+ +   VkImage dest_image;
+ +   image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ +   anv_CreateImage(vk_device, &image_info,
+ +                   &cmd_buffer->pool->alloc, &dest_image);
+ +
+ +   /* We could use a vk call to bind memory, but that would require
+ +    * creating a dummy memory object etc. so there's really no point.
+ +    */
+ +   anv_image_from_handle(src_image)->bo = src;
+ +   anv_image_from_handle(src_image)->offset = src_offset;
+ +   anv_image_from_handle(dest_image)->bo = dest;
+ +   anv_image_from_handle(dest_image)->offset = dest_offset;
+ +
+ +   struct anv_image_view src_iview;
+ +   anv_image_view_init(&src_iview, cmd_buffer->device,
+ +      &(VkImageViewCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +         .image = src_image,
+ +         .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ +         .format = copy_format,
+ +         .subresourceRange = {
+ +            .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +            .baseMipLevel = 0,
+ +            .levelCount = 1,
+ +            .baseArrayLayer = 0,
+ +            .layerCount = 1
+ +         },
+ +      },
+ +      cmd_buffer, 0);
+ +
+ +   struct anv_image_view dest_iview;
+ +   anv_image_view_init(&dest_iview, cmd_buffer->device,
+ +      &(VkImageViewCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +         .image = dest_image,
+ +         .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ +         .format = copy_format,
+ +         .subresourceRange = {
+ +            .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +            .baseMipLevel = 0,
+ +            .levelCount = 1,
+ +            .baseArrayLayer = 0,
+ +            .layerCount = 1,
+ +         },
+ +      },
+ +      cmd_buffer, 0);
+ +
+ +   meta_emit_blit(cmd_buffer,
+ +                  anv_image_from_handle(src_image),
+ +                  &src_iview,
+ +                  (VkOffset3D) { 0, 0, 0 },
+ +                  (VkExtent3D) { width, height, 1 },
+ +                  anv_image_from_handle(dest_image),
+ +                  &dest_iview,
+ +                  (VkOffset3D) { 0, 0, 0 },
+ +                  (VkExtent3D) { width, height, 1 },
+ +                  VK_FILTER_NEAREST);
+ +
+ +   anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc);
+ +   anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc);
+ +}
+ +
+ +void anv_CmdCopyBuffer(
+ +    VkCommandBuffer                             commandBuffer,
+ +    VkBuffer                                    srcBuffer,
+ +    VkBuffer                                    destBuffer,
+ +    uint32_t                                    regionCount,
+ +    const VkBufferCopy*                         pRegions)
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ +   ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
+ +   ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer);
+ +
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   meta_prepare_blit(cmd_buffer, &saved_state);
+ +
+ +   for (unsigned r = 0; r < regionCount; r++) {
+ +      uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
+ +      uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
+ +      uint64_t copy_size = pRegions[r].size;
+ +
+ +      /* First, we compute the biggest format that can be used with the
+ +       * given offsets and size.
+ +       */
+ +      int bs = 16;
+ +
+ +      int fs = ffs(src_offset) - 1;
+ +      if (fs != -1)
+ +         bs = MIN2(bs, 1 << fs);
+ +      assert(src_offset % bs == 0);
+ +
+ +      fs = ffs(dest_offset) - 1;
+ +      if (fs != -1)
+ +         bs = MIN2(bs, 1 << fs);
+ +      assert(dest_offset % bs == 0);
+ +
+ +      fs = ffs(pRegions[r].size) - 1;
+ +      if (fs != -1)
+ +         bs = MIN2(bs, 1 << fs);
+ +      assert(pRegions[r].size % bs == 0);
+ +
+ +      VkFormat copy_format = vk_format_for_size(bs);
+ +
+ +      /* This is maximum possible width/height our HW can handle */
+ +      uint64_t max_surface_dim = 1 << 14;
+ +
+ +      /* First, we make a bunch of max-sized copies */
+ +      uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs;
+ +      while (copy_size >= max_copy_size) {
+ +         do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
+ +                        dest_buffer->bo, dest_offset,
+ +                        max_surface_dim, max_surface_dim, copy_format);
+ +         copy_size -= max_copy_size;
+ +         src_offset += max_copy_size;
+ +         dest_offset += max_copy_size;
+ +      }
+ +
+ +      uint64_t height = copy_size / (max_surface_dim * bs);
+ +      assert(height < max_surface_dim);
+ +      if (height != 0) {
+ +         uint64_t rect_copy_size = height * max_surface_dim * bs;
+ +         do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
+ +                        dest_buffer->bo, dest_offset,
+ +                        max_surface_dim, height, copy_format);
+ +         copy_size -= rect_copy_size;
+ +         src_offset += rect_copy_size;
+ +         dest_offset += rect_copy_size;
+ +      }
+ +
+ +      if (copy_size != 0) {
+ +         do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
+ +                        dest_buffer->bo, dest_offset,
+ +                        copy_size / bs, 1, copy_format);
+ +      }
+ +   }
+ +
+ +   meta_finish_blit(cmd_buffer, &saved_state);
+ +}
+ +
+ +void anv_CmdUpdateBuffer(
+ +    VkCommandBuffer                             commandBuffer,
+ +    VkBuffer                                    dstBuffer,
+ +    VkDeviceSize                                dstOffset,
+ +    VkDeviceSize                                dataSize,
+ +    const uint32_t*                             pData)
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ +   ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   meta_prepare_blit(cmd_buffer, &saved_state);
+ +
+ +   /* We can't quite grab a full block because the state stream needs a
+ +    * little data at the top to build its linked list.
+ +    */
+ +   const uint32_t max_update_size =
+ +      cmd_buffer->device->dynamic_state_block_pool.block_size - 64;
+ +
+ +   assert(max_update_size < (1 << 14) * 4);
+ +
+ +   while (dataSize) {
+ +      const uint32_t copy_size = MIN2(dataSize, max_update_size);
+ +
+ +      struct anv_state tmp_data =
+ +         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
+ +
+ +      memcpy(tmp_data.map, pData, copy_size);
+ +
+ +      VkFormat format;
+ +      int bs;
+ +      if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) {
+ +         format = VK_FORMAT_R32G32B32A32_UINT;
+ +         bs = 16;
+ +      } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) {
+ +         format = VK_FORMAT_R32G32_UINT;
+ +         bs = 8;
+ +      } else {
+ +         assert((copy_size & 3) == 0 && (dstOffset & 3) == 0);
+ +         format = VK_FORMAT_R32_UINT;
+ +         bs = 4;
+ +      }
+ +
+ +      do_buffer_copy(cmd_buffer,
+ +                     &cmd_buffer->device->dynamic_state_block_pool.bo,
+ +                     tmp_data.offset,
+ +                     dst_buffer->bo, dst_buffer->offset + dstOffset,
+ +                     copy_size / bs, 1, format);
+ +
+ +      dataSize -= copy_size;
+ +      dstOffset += copy_size;
+ +      pData = (void *)pData + copy_size;
+ +   }
+ +}
+ +
+ +static VkFormat
+ +choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect)
+ +{
+ +   assert(__builtin_popcount(aspect) == 1);
+ +
+ +   struct isl_surf *surf =
+ +      &anv_image_get_surface_for_aspect_mask(image, aspect)->isl;
+ +
+ +   /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT
+ +    * formats for the source and destination image views.
+ +    *
+ +    * From the Vulkan spec (2015-12-30):
+ +    *
+ +    *    vkCmdCopyImage performs image copies in a similar manner to a host
+ +    *    memcpy. It does not perform general-purpose conversions such as
+ +    *    scaling, resizing, blending, color-space conversion, or format
+ +    *    conversions.  Rather, it simply copies raw image data. vkCmdCopyImage
+ +    *    can copy between images with different formats, provided the formats
+ +    *    are compatible as defined below.
+ +    *
+ +    *    [The spec later defines compatibility as having the same number of
+ +    *    bytes per block].
+ +    */
+ +   return vk_format_for_size(isl_format_layouts[surf->format].bs);
+ +}
+ +
+ +static VkFormat
+ +choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect)
+ +{
+ +   assert(__builtin_popcount(aspect) == 1);
+ +
+ +   /* vkCmdCopy* commands behave like memcpy. Therefore we choose
+ +    * compatable UINT formats for the source and destination image views.
+ +    *
+ +    * For the buffer, we go back to the original image format and get a
+ +    * the format as if it were linear.  This way, for RGB formats, we get
+ +    * an RGB format here even if the tiled image is RGBA. XXX: This doesn't
+ +    * work if the buffer is the destination.
+ +    */
+ +   enum isl_format linear_format = anv_get_isl_format(format, aspect,
+ +                                                      VK_IMAGE_TILING_LINEAR,
+ +                                                      NULL);
+ +
+ +   return vk_format_for_size(isl_format_layouts[linear_format].bs);
+ +}
+ +
+ +void anv_CmdCopyImage(
+ +    VkCommandBuffer                             commandBuffer,
+ +    VkImage                                     srcImage,
+ +    VkImageLayout                               srcImageLayout,
+ +    VkImage                                     destImage,
+ +    VkImageLayout                               destImageLayout,
+ +    uint32_t                                    regionCount,
+ +    const VkImageCopy*                          pRegions)
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ +   ANV_FROM_HANDLE(anv_image, src_image, srcImage);
+ +   ANV_FROM_HANDLE(anv_image, dest_image, destImage);
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   /* From the Vulkan 1.0 spec:
+ +    *
+ +    *    vkCmdCopyImage can be used to copy image data between multisample
+ +    *    images, but both images must have the same number of samples.
+ +    */
+ +   assert(src_image->samples == dest_image->samples);
+ +
+ +   meta_prepare_blit(cmd_buffer, &saved_state);
+ +
+ +   for (unsigned r = 0; r < regionCount; r++) {
+ +      assert(pRegions[r].srcSubresource.aspectMask ==
+ +             pRegions[r].dstSubresource.aspectMask);
+ +
+ +      VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask;
+ +
+ +      VkFormat src_format = choose_iview_format(src_image, aspect);
+ +      VkFormat dst_format = choose_iview_format(dest_image, aspect);
+ +
+ +      struct anv_image_view src_iview;
+ +      anv_image_view_init(&src_iview, cmd_buffer->device,
+ +         &(VkImageViewCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +            .image = srcImage,
+ +            .viewType = anv_meta_get_view_type(src_image),
+ +            .format = src_format,
+ +            .subresourceRange = {
+ +               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +               .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
+ +               .levelCount = 1,
+ +               .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
+ +               .layerCount = pRegions[r].dstSubresource.layerCount,
+ +            },
+ +         },
+ +         cmd_buffer, 0);
+ +
+ +      const VkOffset3D dest_offset = {
+ +         .x = pRegions[r].dstOffset.x,
+ +         .y = pRegions[r].dstOffset.y,
+ +         .z = 0,
+ +      };
+ +
+ +      unsigned num_slices;
+ +      if (src_image->type == VK_IMAGE_TYPE_3D) {
+ +         assert(pRegions[r].srcSubresource.layerCount == 1 &&
+ +                pRegions[r].dstSubresource.layerCount == 1);
+ +         num_slices = pRegions[r].extent.depth;
+ +      } else {
+ +         assert(pRegions[r].srcSubresource.layerCount ==
+ +                pRegions[r].dstSubresource.layerCount);
+ +         assert(pRegions[r].extent.depth == 1);
+ +         num_slices = pRegions[r].dstSubresource.layerCount;
+ +      }
+ +
+ +      const uint32_t dest_base_array_slice =
+ +         anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
+ +                                  &pRegions[r].dstOffset);
+ +
+ +      for (unsigned slice = 0; slice < num_slices; slice++) {
+ +         VkOffset3D src_offset = pRegions[r].srcOffset;
+ +         src_offset.z += slice;
+ +
+ +         struct anv_image_view dest_iview;
+ +         anv_image_view_init(&dest_iview, cmd_buffer->device,
+ +            &(VkImageViewCreateInfo) {
+ +               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +               .image = destImage,
+ +               .viewType = anv_meta_get_view_type(dest_image),
+ +               .format = dst_format,
+ +               .subresourceRange = {
+ +                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +                  .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
+ +                  .levelCount = 1,
+ +                  .baseArrayLayer = dest_base_array_slice + slice,
+ +                  .layerCount = 1
+ +               },
+ +            },
+ +            cmd_buffer, 0);
+ +
+ +         meta_emit_blit(cmd_buffer,
+ +                        src_image, &src_iview,
+ +                        src_offset,
+ +                        pRegions[r].extent,
+ +                        dest_image, &dest_iview,
+ +                        dest_offset,
+ +                        pRegions[r].extent,
+ +                        VK_FILTER_NEAREST);
+ +      }
+ +   }
+ +
+ +   meta_finish_blit(cmd_buffer, &saved_state);
+ +}
+ +
+ +void anv_CmdBlitImage(
+ +    VkCommandBuffer                             commandBuffer,
+ +    VkImage                                     srcImage,
+ +    VkImageLayout                               srcImageLayout,
+ +    VkImage                                     destImage,
+ +    VkImageLayout                               destImageLayout,
+ +    uint32_t                                    regionCount,
+ +    const VkImageBlit*                          pRegions,
+ +    VkFilter                                    filter)
+ +
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ +   ANV_FROM_HANDLE(anv_image, src_image, srcImage);
+ +   ANV_FROM_HANDLE(anv_image, dest_image, destImage);
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   /* From the Vulkan 1.0 spec:
+ +    *
+ +    *    vkCmdBlitImage must not be used for multisampled source or
+ +    *    destination images. Use vkCmdResolveImage for this purpose.
+ +    */
+ +   assert(src_image->samples == 1);
+ +   assert(dest_image->samples == 1);
+ +
+ +   anv_finishme("respect VkFilter");
+ +
+ +   meta_prepare_blit(cmd_buffer, &saved_state);
+ +
+ +   for (unsigned r = 0; r < regionCount; r++) {
+ +      struct anv_image_view src_iview;
+ +      anv_image_view_init(&src_iview, cmd_buffer->device,
+ +         &(VkImageViewCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +            .image = srcImage,
+ +            .viewType = anv_meta_get_view_type(src_image),
+ +            .format = src_image->vk_format,
+ +            .subresourceRange = {
+ +               .aspectMask = pRegions[r].srcSubresource.aspectMask,
+ +               .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
+ +               .levelCount = 1,
+ +               .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
+ +               .layerCount = 1
+ +            },
+ +         },
+ +         cmd_buffer, 0);
+ +
+ +      const VkOffset3D dest_offset = {
+ +         .x = pRegions[r].dstOffsets[0].x,
+ +         .y = pRegions[r].dstOffsets[0].y,
+ +         .z = 0,
+ +      };
+ +
+ +      if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x ||
+ +          pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y ||
+ +          pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x ||
+ +          pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y)
+ +         anv_finishme("FINISHME: Allow flipping in blits");
+ +
+ +      const VkExtent3D dest_extent = {
+ +         .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x,
+ +         .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y,
+ +      };
+ +
+ +      const VkExtent3D src_extent = {
+ +         .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x,
+ +         .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y,
+ +      };
+ +
+ +      const uint32_t dest_array_slice =
+ +         anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
+ +                                  &pRegions[r].dstOffsets[0]);
+ +
+ +      if (pRegions[r].srcSubresource.layerCount > 1)
+ +         anv_finishme("FINISHME: copy multiple array layers");
+ +
+ +      if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z ||
+ +          pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z)
+ +         anv_finishme("FINISHME: copy multiple depth layers");
+ +
+ +      struct anv_image_view dest_iview;
+ +      anv_image_view_init(&dest_iview, cmd_buffer->device,
+ +         &(VkImageViewCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +            .image = destImage,
+ +            .viewType = anv_meta_get_view_type(dest_image),
+ +            .format = dest_image->vk_format,
+ +            .subresourceRange = {
+ +               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +               .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
+ +               .levelCount = 1,
+ +               .baseArrayLayer = dest_array_slice,
+ +               .layerCount = 1
+ +            },
+ +         },
+ +         cmd_buffer, 0);
+ +
+ +      meta_emit_blit(cmd_buffer,
+ +                     src_image, &src_iview,
+ +                     pRegions[r].srcOffsets[0], src_extent,
+ +                     dest_image, &dest_iview,
+ +                     dest_offset, dest_extent,
+ +                     filter);
+ +   }
+ +
+ +   meta_finish_blit(cmd_buffer, &saved_state);
+ +}
+ +
+ +static struct anv_image *
+ +make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format,
+ +                      VkImageUsageFlags usage,
+ +                      VkImageType image_type,
+ +                      const VkAllocationCallbacks *alloc,
+ +                      const VkBufferImageCopy *copy)
+ +{
+ +   ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer);
+ +
+ +   VkExtent3D extent = copy->imageExtent;
+ +   if (copy->bufferRowLength)
+ +      extent.width = copy->bufferRowLength;
+ +   if (copy->bufferImageHeight)
+ +      extent.height = copy->bufferImageHeight;
+ +   extent.depth = 1;
+ +   extent = meta_region_extent_el(format, &extent);
+ +
+ +   VkImageAspectFlags aspect = copy->imageSubresource.aspectMask;
+ +   VkFormat buffer_format = choose_buffer_format(format, aspect);
+ +
+ +   VkImage vk_image;
+ +   VkResult result = anv_CreateImage(vk_device,
+ +      &(VkImageCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ +         .imageType = VK_IMAGE_TYPE_2D,
+ +         .format = buffer_format,
+ +         .extent = extent,
+ +         .mipLevels = 1,
+ +         .arrayLayers = 1,
+ +         .samples = 1,
+ +         .tiling = VK_IMAGE_TILING_LINEAR,
+ +         .usage = usage,
+ +         .flags = 0,
+ +      }, alloc, &vk_image);
+ +   assert(result == VK_SUCCESS);
+ +
+ +   ANV_FROM_HANDLE(anv_image, image, vk_image);
+ +
+ +   /* We could use a vk call to bind memory, but that would require
+ +    * creating a dummy memory object etc. so there's really no point.
+ +    */
+ +   image->bo = buffer->bo;
+ +   image->offset = buffer->offset + copy->bufferOffset;
+ +
+ +   return image;
+ +}
+ +
+ +void anv_CmdCopyBufferToImage(
+ +    VkCommandBuffer                             commandBuffer,
+ +    VkBuffer                                    srcBuffer,
+ +    VkImage                                     destImage,
+ +    VkImageLayout                               destImageLayout,
+ +    uint32_t                                    regionCount,
+ +    const VkBufferImageCopy*                    pRegions)
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ +   ANV_FROM_HANDLE(anv_image, dest_image, destImage);
+ +   VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
+ +    * VK_SAMPLE_COUNT_1_BIT."
+ +    */
+ +   assert(dest_image->samples == 1);
+ +
+ +   meta_prepare_blit(cmd_buffer, &saved_state);
+ +
+ +   for (unsigned r = 0; r < regionCount; r++) {
+ +      VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
+ +
+ +      VkFormat image_format = choose_iview_format(dest_image, aspect);
+ +
+ +      struct anv_image *src_image =
+ +         make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format,
+ +                               VK_IMAGE_USAGE_SAMPLED_BIT,
+ +                               dest_image->type, &cmd_buffer->pool->alloc,
+ +                               &pRegions[r]);
+ +
+ +      const uint32_t dest_base_array_slice =
+ +         anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource,
+ +                                  &pRegions[r].imageOffset);
+ +
+ +      unsigned num_slices_3d = pRegions[r].imageExtent.depth;
+ +      unsigned num_slices_array = pRegions[r].imageSubresource.layerCount;
+ +      unsigned slice_3d = 0;
+ +      unsigned slice_array = 0;
+ +      while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
+ +         struct anv_image_view src_iview;
+ +         anv_image_view_init(&src_iview, cmd_buffer->device,
+ +            &(VkImageViewCreateInfo) {
+ +               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +               .image = anv_image_to_handle(src_image),
+ +               .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ +               .format = src_image->vk_format,
+ +               .subresourceRange = {
+ +                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +                  .baseMipLevel = 0,
+ +                  .levelCount = 1,
+ +                  .baseArrayLayer = 0,
+ +                  .layerCount = 1,
+ +               },
+ +            },
+ +            cmd_buffer, 0);
+ +
+ +         uint32_t img_x = 0;
+ +         uint32_t img_y = 0;
+ +         uint32_t img_o = 0;
+ +         if (isl_format_is_compressed(dest_image->format->surface_format))
+ +            isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev,
+ +                                                   &dest_image->color_surface.isl,
+ +                                                   pRegions[r].imageSubresource.mipLevel,
+ +                                                   pRegions[r].imageSubresource.baseArrayLayer + slice_array,
+ +                                                   pRegions[r].imageOffset.z + slice_3d,
+ +                                                   &img_o, &img_x, &img_y);
+ +
+ +         VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset);
+ +         dest_offset_el.x += img_x;
+ +         dest_offset_el.y += img_y;
+ +         dest_offset_el.z = 0;
+ +
+ +         struct anv_image_view dest_iview;
+ +         anv_image_view_init(&dest_iview, cmd_buffer->device,
+ +            &(VkImageViewCreateInfo) {
+ +               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +               .image = anv_image_to_handle(dest_image),
+ +               .viewType = anv_meta_get_view_type(dest_image),
+ +               .format = image_format,
+ +               .subresourceRange = {
+ +                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +                  .baseMipLevel = pRegions[r].imageSubresource.mipLevel,
+ +                  .levelCount = 1,
+ +                  .baseArrayLayer = dest_base_array_slice +
+ +                                    slice_array + slice_3d,
+ +                  .layerCount = 1
+ +               },
+ +            },
+ +            cmd_buffer, img_o);
+ +
+ +         const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format,
+ +                                                      &pRegions[r].imageExtent);
+ +
+ +         meta_emit_blit(cmd_buffer,
+ +                        src_image,
+ +                        &src_iview,
+ +                        (VkOffset3D){0, 0, 0},
+ +                        img_extent_el,
+ +                        dest_image,
+ +                        &dest_iview,
+ +                        dest_offset_el,
+ +                        img_extent_el,
+ +                        VK_FILTER_NEAREST);
+ +
+ +         /* Once we've done the blit, all of the actual information about
+ +          * the image is embedded in the command buffer so we can just
+ +          * increment the offset directly in the image effectively
+ +          * re-binding it to different backing memory.
+ +          */
+ +         src_image->offset += src_image->extent.width *
+ +                              src_image->extent.height *
+ +                              src_image->format->isl_layout->bs;
+ +
+ +         if (dest_image->type == VK_IMAGE_TYPE_3D)
+ +            slice_3d++;
+ +         else
+ +            slice_array++;
+ +      }
+ +
+ +      anv_DestroyImage(vk_device, anv_image_to_handle(src_image),
+ +                       &cmd_buffer->pool->alloc);
+ +   }
+ +
+ +   meta_finish_blit(cmd_buffer, &saved_state);
+ +}
+ +
+ +void anv_CmdCopyImageToBuffer(
+ +    VkCommandBuffer                             commandBuffer,
+ +    VkImage                                     srcImage,
+ +    VkImageLayout                               srcImageLayout,
+ +    VkBuffer                                    destBuffer,
+ +    uint32_t                                    regionCount,
+ +    const VkBufferImageCopy*                    pRegions)
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ +   ANV_FROM_HANDLE(anv_image, src_image, srcImage);
+ +   VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +
+ +   /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to
+ +    * VK_SAMPLE_COUNT_1_BIT."
+ +    */
+ +   assert(src_image->samples == 1);
+ +
+ +   meta_prepare_blit(cmd_buffer, &saved_state);
+ +
+ +   for (unsigned r = 0; r < regionCount; r++) {
+ +      VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
+ +
+ +      VkFormat image_format = choose_iview_format(src_image, aspect);
+ +
+ +      struct anv_image_view src_iview;
+ +      anv_image_view_init(&src_iview, cmd_buffer->device,
+ +         &(VkImageViewCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +            .image = srcImage,
+ +            .viewType = anv_meta_get_view_type(src_image),
+ +            .format = image_format,
+ +            .subresourceRange = {
+ +               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +               .baseMipLevel = pRegions[r].imageSubresource.mipLevel,
+ +               .levelCount = 1,
+ +               .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer,
+ +               .layerCount = pRegions[r].imageSubresource.layerCount,
+ +            },
+ +         },
+ +         cmd_buffer, 0);
+ +
+ +      struct anv_image *dest_image =
+ +         make_image_for_buffer(vk_device, destBuffer, src_image->vk_format,
+ +                               VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ +                               src_image->type, &cmd_buffer->pool->alloc,
+ +                               &pRegions[r]);
+ +
+ +      unsigned num_slices;
+ +      if (src_image->type == VK_IMAGE_TYPE_3D) {
+ +         assert(pRegions[r].imageSubresource.layerCount == 1);
+ +         num_slices = pRegions[r].imageExtent.depth;
+ +      } else {
+ +         assert(pRegions[r].imageExtent.depth == 1);
+ +         num_slices = pRegions[r].imageSubresource.layerCount;
+ +      }
+ +
+ +      for (unsigned slice = 0; slice < num_slices; slice++) {
+ +         VkOffset3D src_offset = pRegions[r].imageOffset;
+ +         src_offset.z += slice;
+ +
+ +         struct anv_image_view dest_iview;
+ +         anv_image_view_init(&dest_iview, cmd_buffer->device,
+ +            &(VkImageViewCreateInfo) {
+ +               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +               .image = anv_image_to_handle(dest_image),
+ +               .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ +               .format = dest_image->vk_format,
+ +               .subresourceRange = {
+ +                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +                  .baseMipLevel = 0,
+ +                  .levelCount = 1,
+ +                  .baseArrayLayer = 0,
+ +                  .layerCount = 1
+ +               },
+ +            },
+ +            cmd_buffer, 0);
+ +
+ +         meta_emit_blit(cmd_buffer,
+ +                        anv_image_from_handle(srcImage),
+ +                        &src_iview,
+ +                        src_offset,
+ +                        pRegions[r].imageExtent,
+ +                        dest_image,
+ +                        &dest_iview,
+ +                        (VkOffset3D) { 0, 0, 0 },
+ +                        pRegions[r].imageExtent,
+ +                        VK_FILTER_NEAREST);
+ +
+ +         /* Once we've done the blit, all of the actual information about
+ +          * the image is embedded in the command buffer so we can just
+ +          * increment the offset directly in the image effectively
+ +          * re-binding it to different backing memory.
+ +          */
+ +         dest_image->offset += dest_image->extent.width *
+ +                               dest_image->extent.height *
+ +                               src_image->format->isl_layout->bs;
+ +      }
+ +
+ +      anv_DestroyImage(vk_device, anv_image_to_handle(dest_image),
+ +                       &cmd_buffer->pool->alloc);
+ +   }
+ +
+ +   meta_finish_blit(cmd_buffer, &saved_state);
+ +}
+ +
+ +static void *
+ +meta_alloc(void* _device, size_t size, size_t alignment,
+ +           VkSystemAllocationScope allocationScope)
+ +{
+ +   struct anv_device *device = _device;
+ +   return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment,
+ +                                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ +}
+ +
+ +static void *
+ +meta_realloc(void* _device, void *original, size_t size, size_t alignment,
+ +             VkSystemAllocationScope allocationScope)
+ +{
+ +   struct anv_device *device = _device;
+ +   return device->alloc.pfnReallocation(device->alloc.pUserData, original,
+ +                                        size, alignment,
+ +                                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ +}
+ +
+ +static void
+ +meta_free(void* _device, void *data)
+ +{
+ +   struct anv_device *device = _device;
+ +   return device->alloc.pfnFree(device->alloc.pUserData, data);
+ +}
+ +
+ +VkResult
+ +anv_device_init_meta(struct anv_device *device)
+ +{
+ +   VkResult result;
+ +
+ +   device->meta_state.alloc = (VkAllocationCallbacks) {
+ +      .pUserData = device,
+ +      .pfnAllocation = meta_alloc,
+ +      .pfnReallocation = meta_realloc,
+ +      .pfnFree = meta_free,
+ +   };
+ +
+ +   result = anv_device_init_meta_clear_state(device);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_clear;
+ +
+ +   result = anv_device_init_meta_resolve_state(device);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_resolve;
+ +
+ +   result = anv_device_init_meta_blit_state(device);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_blit;
+ +
+ +   return VK_SUCCESS;
+ +
+ +fail_blit:
+ +   anv_device_finish_meta_resolve_state(device);
+ +fail_resolve:
+ +   anv_device_finish_meta_clear_state(device);
+ +fail_clear:
+ +   return result;
+ +}
+ +
+ +void
+ +anv_device_finish_meta(struct anv_device *device)
+ +{
+ +   anv_device_finish_meta_resolve_state(device);
+ +   anv_device_finish_meta_clear_state(device);
+ +
+ +   /* Blit */
+ +   anv_DestroyRenderPass(anv_device_to_handle(device),
+ +                         device->meta_state.blit.render_pass,
+ +                         &device->meta_state.alloc);
+ +   anv_DestroyPipeline(anv_device_to_handle(device),
+ +                       device->meta_state.blit.pipeline_1d_src,
+ +                       &device->meta_state.alloc);
+ +   anv_DestroyPipeline(anv_device_to_handle(device),
+ +                       device->meta_state.blit.pipeline_2d_src,
+ +                       &device->meta_state.alloc);
+ +   anv_DestroyPipeline(anv_device_to_handle(device),
+ +                       device->meta_state.blit.pipeline_3d_src,
+ +                       &device->meta_state.alloc);
+ +   anv_DestroyPipelineLayout(anv_device_to_handle(device),
+ +                             device->meta_state.blit.pipeline_layout,
+ +                             &device->meta_state.alloc);
+ +   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+ +                                  device->meta_state.blit.ds_layout,
+ +                                  &device->meta_state.alloc);
+ +}
diff --cc src/vulkan/anv_meta_clear.c

index 027217b88dc7d4cac2f4057f25932fcf1696b88a,0000000000000000000000000000000000000000..15e24a32a75ad57d57c0135e66e8857c4c194689

mode 100644,000000..100644
--- 1/src/vulkan/anv_meta_clear.c
--- /dev/null
+++ b/src/vulkan/anv_meta_clear.c
@@@ -1,1097 -1,0 +1,1097 @@@
- #include "glsl/nir/nir_builder.h"
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include "anv_meta.h"
+ +#include "anv_private.h"
++#include "nir/nir_builder.h"
+ +
+ +/** Vertex attributes for color clears.  */
+ +struct color_clear_vattrs {
+ +   struct anv_vue_header vue_header;
+ +   float position[2]; /**< 3DPRIM_RECTLIST */
+ +   VkClearColorValue color;
+ +};
+ +
+ +/** Vertex attributes for depthstencil clears.  */
+ +struct depthstencil_clear_vattrs {
+ +   struct anv_vue_header vue_header;
+ +   float position[2]; /*<< 3DPRIM_RECTLIST */
+ +};
+ +
+ +static void
+ +meta_clear_begin(struct anv_meta_saved_state *saved_state,
+ +                 struct anv_cmd_buffer *cmd_buffer)
+ +{
+ +   anv_meta_save(saved_state, cmd_buffer,
+ +                 (1 << VK_DYNAMIC_STATE_VIEWPORT) |
+ +                 (1 << VK_DYNAMIC_STATE_SCISSOR) |
+ +                 (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE));
+ +
+ +   cmd_buffer->state.dynamic.viewport.count = 0;
+ +   cmd_buffer->state.dynamic.scissor.count = 0;
+ +}
+ +
+ +static void
+ +meta_clear_end(struct anv_meta_saved_state *saved_state,
+ +               struct anv_cmd_buffer *cmd_buffer)
+ +{
+ +   anv_meta_restore(saved_state, cmd_buffer);
+ +}
+ +
+ +static void
+ +build_color_shaders(struct nir_shader **out_vs,
+ +                    struct nir_shader **out_fs,
+ +                    uint32_t frag_output)
+ +{
+ +   nir_builder vs_b;
+ +   nir_builder fs_b;
+ +
+ +   nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
+ +   nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ +
+ +   vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
+ +   fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");
+ +
+ +   const struct glsl_type *position_type = glsl_vec4_type();
+ +   const struct glsl_type *color_type = glsl_vec4_type();
+ +
+ +   nir_variable *vs_in_pos =
+ +      nir_variable_create(vs_b.shader, nir_var_shader_in, position_type,
+ +                          "a_position");
+ +   vs_in_pos->data.location = VERT_ATTRIB_GENERIC0;
+ +
+ +   nir_variable *vs_out_pos =
+ +      nir_variable_create(vs_b.shader, nir_var_shader_out, position_type,
+ +                          "gl_Position");
+ +   vs_out_pos->data.location = VARYING_SLOT_POS;
+ +
+ +   nir_variable *vs_in_color =
+ +      nir_variable_create(vs_b.shader, nir_var_shader_in, color_type,
+ +                          "a_color");
+ +   vs_in_color->data.location = VERT_ATTRIB_GENERIC1;
+ +
+ +   nir_variable *vs_out_color =
+ +      nir_variable_create(vs_b.shader, nir_var_shader_out, color_type,
+ +                          "v_color");
+ +   vs_out_color->data.location = VARYING_SLOT_VAR0;
+ +   vs_out_color->data.interpolation = INTERP_QUALIFIER_FLAT;
+ +
+ +   nir_variable *fs_in_color =
+ +      nir_variable_create(fs_b.shader, nir_var_shader_in, color_type,
+ +                          "v_color");
+ +   fs_in_color->data.location = vs_out_color->data.location;
+ +   fs_in_color->data.interpolation = vs_out_color->data.interpolation;
+ +
+ +   nir_variable *fs_out_color =
+ +      nir_variable_create(fs_b.shader, nir_var_shader_out, color_type,
+ +                          "f_color");
+ +   fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
+ +
+ +   nir_copy_var(&vs_b, vs_out_pos, vs_in_pos);
+ +   nir_copy_var(&vs_b, vs_out_color, vs_in_color);
+ +   nir_copy_var(&fs_b, fs_out_color, fs_in_color);
+ +
+ +   *out_vs = vs_b.shader;
+ +   *out_fs = fs_b.shader;
+ +}
+ +
+ +static VkResult
+ +create_pipeline(struct anv_device *device,
+ +                uint32_t samples,
+ +                struct nir_shader *vs_nir,
+ +                struct nir_shader *fs_nir,
+ +                const VkPipelineVertexInputStateCreateInfo *vi_state,
+ +                const VkPipelineDepthStencilStateCreateInfo *ds_state,
+ +                const VkPipelineColorBlendStateCreateInfo *cb_state,
+ +                const VkAllocationCallbacks *alloc,
+ +                bool use_repclear,
+ +                struct anv_pipeline **pipeline)
+ +{
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +   VkResult result;
+ +
+ +   struct anv_shader_module vs_m = { .nir = vs_nir };
+ +   struct anv_shader_module fs_m = { .nir = fs_nir };
+ +
+ +   VkPipeline pipeline_h = VK_NULL_HANDLE;
+ +   result = anv_graphics_pipeline_create(device_h,
+ +      VK_NULL_HANDLE,
+ +      &(VkGraphicsPipelineCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ +         .stageCount = fs_nir ? 2 : 1,
+ +         .pStages = (VkPipelineShaderStageCreateInfo[]) {
+ +            {
+ +               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ +               .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ +               .module = anv_shader_module_to_handle(&vs_m),
+ +               .pName = "main",
+ +            },
+ +            {
+ +               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ +               .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ +               .module = anv_shader_module_to_handle(&fs_m),
+ +               .pName = "main",
+ +            },
+ +         },
+ +         .pVertexInputState = vi_state,
+ +         .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ +            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ +            .primitiveRestartEnable = false,
+ +         },
+ +         .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ +            .viewportCount = 1,
+ +            .pViewports = NULL, /* dynamic */
+ +            .scissorCount = 1,
+ +            .pScissors = NULL, /* dynamic */
+ +         },
+ +         .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ +            .rasterizerDiscardEnable = false,
+ +            .polygonMode = VK_POLYGON_MODE_FILL,
+ +            .cullMode = VK_CULL_MODE_NONE,
+ +            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ +            .depthBiasEnable = false,
+ +         },
+ +         .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ +            .rasterizationSamples = samples,
+ +            .sampleShadingEnable = false,
+ +            .pSampleMask = (VkSampleMask[]) { ~0 },
+ +            .alphaToCoverageEnable = false,
+ +            .alphaToOneEnable = false,
+ +         },
+ +         .pDepthStencilState = ds_state,
+ +         .pColorBlendState = cb_state,
+ +         .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ +            /* The meta clear pipeline declares all state as dynamic.
+ +             * As a consequence, vkCmdBindPipeline writes no dynamic state
+ +             * to the cmd buffer. Therefore, at the end of the meta clear,
+ +             * we need only restore dynamic state was vkCmdSet.
+ +             */
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ +            .dynamicStateCount = 9,
+ +            .pDynamicStates = (VkDynamicState[]) {
+ +               VK_DYNAMIC_STATE_VIEWPORT,
+ +               VK_DYNAMIC_STATE_SCISSOR,
+ +               VK_DYNAMIC_STATE_LINE_WIDTH,
+ +               VK_DYNAMIC_STATE_DEPTH_BIAS,
+ +               VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ +               VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ +               VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ +               VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+ +               VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ +            },
+ +         },
+ +         .flags = 0,
+ +         .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass),
+ +         .subpass = 0,
+ +      },
+ +      &(struct anv_graphics_pipeline_create_info) {
+ +         .color_attachment_count = MAX_RTS,
+ +         .use_repclear = use_repclear,
+ +         .disable_viewport = true,
+ +         .disable_vs = true,
+ +         .use_rectlist = true
+ +      },
+ +      alloc,
+ +      &pipeline_h);
+ +
+ +   ralloc_free(vs_nir);
+ +   ralloc_free(fs_nir);
+ +
+ +   *pipeline = anv_pipeline_from_handle(pipeline_h);
+ +
+ +   return result;
+ +}
+ +
+ +static VkResult
+ +create_color_pipeline(struct anv_device *device,
+ +                      uint32_t samples,
+ +                      uint32_t frag_output,
+ +                      struct anv_pipeline **pipeline)
+ +{
+ +   struct nir_shader *vs_nir;
+ +   struct nir_shader *fs_nir;
+ +   build_color_shaders(&vs_nir, &fs_nir, frag_output);
+ +
+ +   const VkPipelineVertexInputStateCreateInfo vi_state = {
+ +      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ +      .vertexBindingDescriptionCount = 1,
+ +      .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ +         {
+ +            .binding = 0,
+ +            .stride = sizeof(struct color_clear_vattrs),
+ +            .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ +         },
+ +      },
+ +      .vertexAttributeDescriptionCount = 3,
+ +      .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ +         {
+ +            /* VUE Header */
+ +            .location = 0,
+ +            .binding = 0,
+ +            .format = VK_FORMAT_R32G32B32A32_UINT,
+ +            .offset = offsetof(struct color_clear_vattrs, vue_header),
+ +         },
+ +         {
+ +            /* Position */
+ +            .location = 1,
+ +            .binding = 0,
+ +            .format = VK_FORMAT_R32G32_SFLOAT,
+ +            .offset = offsetof(struct color_clear_vattrs, position),
+ +         },
+ +         {
+ +            /* Color */
+ +            .location = 2,
+ +            .binding = 0,
+ +            .format = VK_FORMAT_R32G32B32A32_SFLOAT,
+ +            .offset = offsetof(struct color_clear_vattrs, color),
+ +         },
+ +      },
+ +   };
+ +
+ +   const VkPipelineDepthStencilStateCreateInfo ds_state = {
+ +      .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ +      .depthTestEnable = false,
+ +      .depthWriteEnable = false,
+ +      .depthBoundsTestEnable = false,
+ +      .stencilTestEnable = false,
+ +   };
+ +
+ +   const VkPipelineColorBlendStateCreateInfo cb_state = {
+ +      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ +      .logicOpEnable = false,
+ +      .attachmentCount = 1,
+ +      .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+ +         {
+ +            .blendEnable = false,
+ +            .colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
+ +                              VK_COLOR_COMPONENT_R_BIT |
+ +                              VK_COLOR_COMPONENT_G_BIT |
+ +                              VK_COLOR_COMPONENT_B_BIT,
+ +         },
+ +      },
+ +   };
+ +
+ +   /* Disable repclear because we do not want the compiler to replace the
+ +    * shader. We need the shader to write to the specified color attachment,
+ +    * but the repclear shader writes to all color attachments.
+ +    */
+ +   return
+ +      create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state,
+ +                      &cb_state, &device->meta_state.alloc,
+ +                      /*use_repclear*/ false, pipeline);
+ +}
+ +
+ +static void
+ +destroy_pipeline(struct anv_device *device, struct anv_pipeline *pipeline)
+ +{
+ +   if (!pipeline)
+ +      return;
+ +
+ +   ANV_CALL(DestroyPipeline)(anv_device_to_handle(device),
+ +                             anv_pipeline_to_handle(pipeline),
+ +                             &device->meta_state.alloc);
+ +}
+ +
+ +void
+ +anv_device_finish_meta_clear_state(struct anv_device *device)
+ +{
+ +   struct anv_meta_state *state = &device->meta_state;
+ +
+ +   for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
+ +      for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {
+ +         destroy_pipeline(device, state->clear[i].color_pipelines[j]);
+ +      }
+ +
+ +      destroy_pipeline(device, state->clear[i].depth_only_pipeline);
+ +      destroy_pipeline(device, state->clear[i].stencil_only_pipeline);
+ +      destroy_pipeline(device, state->clear[i].depthstencil_pipeline);
+ +   }
+ +}
+ +
+ +static void
+ +emit_color_clear(struct anv_cmd_buffer *cmd_buffer,
+ +                 const VkClearAttachment *clear_att,
+ +                 const VkClearRect *clear_rect)
+ +{
+ +   struct anv_device *device = cmd_buffer->device;
+ +   const struct anv_subpass *subpass = cmd_buffer->state.subpass;
+ +   const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ +   const uint32_t subpass_att = clear_att->colorAttachment;
+ +   const uint32_t pass_att = subpass->color_attachments[subpass_att];
+ +   const struct anv_image_view *iview = fb->attachments[pass_att];
+ +   const uint32_t samples = iview->image->samples;
+ +   const uint32_t samples_log2 = ffs(samples) - 1;
+ +   struct anv_pipeline *pipeline =
+ +      device->meta_state.clear[samples_log2].color_pipelines[subpass_att];
+ +   VkClearColorValue clear_value = clear_att->clearValue.color;
+ +
+ +   VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer);
+ +   VkPipeline pipeline_h = anv_pipeline_to_handle(pipeline);
+ +
+ +   assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear));
+ +   assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ +   assert(clear_att->colorAttachment < subpass->color_count);
+ +
+ +   const struct color_clear_vattrs vertex_data[3] = {
+ +      {
+ +         .vue_header = { 0 },
+ +         .position = {
+ +            clear_rect->rect.offset.x,
+ +            clear_rect->rect.offset.y,
+ +         },
+ +         .color = clear_value,
+ +      },
+ +      {
+ +         .vue_header = { 0 },
+ +         .position = {
+ +            clear_rect->rect.offset.x + clear_rect->rect.extent.width,
+ +            clear_rect->rect.offset.y,
+ +         },
+ +         .color = clear_value,
+ +      },
+ +      {
+ +         .vue_header = { 0 },
+ +         .position = {
+ +            clear_rect->rect.offset.x + clear_rect->rect.extent.width,
+ +            clear_rect->rect.offset.y + clear_rect->rect.extent.height,
+ +         },
+ +         .color = clear_value,
+ +      },
+ +   };
+ +
+ +   struct anv_state state =
+ +      anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16);
+ +
+ +   struct anv_buffer vertex_buffer = {
+ +      .device = device,
+ +      .size = sizeof(vertex_data),
+ +      .bo = &device->dynamic_state_block_pool.bo,
+ +      .offset = state.offset,
+ +   };
+ +
+ +   ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1,
+ +      (VkViewport[]) {
+ +         {
+ +            .x = 0,
+ +            .y = 0,
+ +            .width = fb->width,
+ +            .height = fb->height,
+ +            .minDepth = 0.0,
+ +            .maxDepth = 1.0,
+ +         },
+ +      });
+ +
+ +   ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1,
+ +      (VkRect2D[]) {
+ +         {
+ +            .offset = { 0, 0 },
+ +            .extent = { fb->width, fb->height },
+ +         }
+ +      });
+ +
+ +   ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1,
+ +      (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) },
+ +      (VkDeviceSize[]) { 0 });
+ +
+ +   if (cmd_buffer->state.pipeline != pipeline) {
+ +      ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +                                pipeline_h);
+ +   }
+ +
+ +   ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0);
+ +}
+ +
+ +
+ +static void
+ +build_depthstencil_shader(struct nir_shader **out_vs)
+ +{
+ +   nir_builder vs_b;
+ +
+ +   nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
+ +
+ +   vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
+ +
+ +   const struct glsl_type *position_type = glsl_vec4_type();
+ +
+ +   nir_variable *vs_in_pos =
+ +      nir_variable_create(vs_b.shader, nir_var_shader_in, position_type,
+ +                          "a_position");
+ +   vs_in_pos->data.location = VERT_ATTRIB_GENERIC0;
+ +
+ +   nir_variable *vs_out_pos =
+ +      nir_variable_create(vs_b.shader, nir_var_shader_out, position_type,
+ +                          "gl_Position");
+ +   vs_out_pos->data.location = VARYING_SLOT_POS;
+ +
+ +   nir_copy_var(&vs_b, vs_out_pos, vs_in_pos);
+ +
+ +   *out_vs = vs_b.shader;
+ +}
+ +
+ +static VkResult
+ +create_depthstencil_pipeline(struct anv_device *device,
+ +                             VkImageAspectFlags aspects,
+ +                             uint32_t samples,
+ +                             struct anv_pipeline **pipeline)
+ +{
+ +   struct nir_shader *vs_nir;
+ +
+ +   build_depthstencil_shader(&vs_nir);
+ +
+ +   const VkPipelineVertexInputStateCreateInfo vi_state = {
+ +      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ +      .vertexBindingDescriptionCount = 1,
+ +      .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ +         {
+ +            .binding = 0,
+ +            .stride = sizeof(struct depthstencil_clear_vattrs),
+ +            .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ +         },
+ +      },
+ +      .vertexAttributeDescriptionCount = 2,
+ +      .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ +         {
+ +            /* VUE Header */
+ +            .location = 0,
+ +            .binding = 0,
+ +            .format = VK_FORMAT_R32G32B32A32_UINT,
+ +            .offset = offsetof(struct depthstencil_clear_vattrs, vue_header),
+ +         },
+ +         {
+ +            /* Position */
+ +            .location = 1,
+ +            .binding = 0,
+ +            .format = VK_FORMAT_R32G32_SFLOAT,
+ +            .offset = offsetof(struct depthstencil_clear_vattrs, position),
+ +         },
+ +      },
+ +   };
+ +
+ +   const VkPipelineDepthStencilStateCreateInfo ds_state = {
+ +      .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ +      .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
+ +      .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+ +      .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
+ +      .depthBoundsTestEnable = false,
+ +      .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
+ +      .front = {
+ +         .passOp = VK_STENCIL_OP_REPLACE,
+ +         .compareOp = VK_COMPARE_OP_ALWAYS,
+ +         .writeMask = UINT32_MAX,
+ +         .reference = 0, /* dynamic */
+ +      },
+ +      .back = { 0 /* dont care */ },
+ +   };
+ +
+ +   const VkPipelineColorBlendStateCreateInfo cb_state = {
+ +      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ +      .logicOpEnable = false,
+ +      .attachmentCount = 0,
+ +      .pAttachments = NULL,
+ +   };
+ +
+ +   return create_pipeline(device, samples, vs_nir, NULL, &vi_state, &ds_state,
+ +                          &cb_state, &device->meta_state.alloc,
+ +                          /*use_repclear*/ true, pipeline);
+ +}
+ +
+ +static void
+ +emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer,
+ +                        const VkClearAttachment *clear_att,
+ +                        const VkClearRect *clear_rect)
+ +{
+ +   struct anv_device *device = cmd_buffer->device;
+ +   struct anv_meta_state *meta_state = &device->meta_state;
+ +   const struct anv_subpass *subpass = cmd_buffer->state.subpass;
+ +   const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ +   const uint32_t pass_att = subpass->depth_stencil_attachment;
+ +   const struct anv_image_view *iview = fb->attachments[pass_att];
+ +   const uint32_t samples = iview->image->samples;
+ +   const uint32_t samples_log2 = ffs(samples) - 1;
+ +   VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
+ +   VkImageAspectFlags aspects = clear_att->aspectMask;
+ +
+ +   VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer);
+ +
+ +   assert(samples_log2 < ARRAY_SIZE(meta_state->clear));
+ +   assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT ||
+ +          aspects == VK_IMAGE_ASPECT_STENCIL_BIT ||
+ +          aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
+ +                      VK_IMAGE_ASPECT_STENCIL_BIT));
+ +   assert(pass_att != VK_ATTACHMENT_UNUSED);
+ +
+ +   const struct depthstencil_clear_vattrs vertex_data[3] = {
+ +      {
+ +         .vue_header = { 0 },
+ +         .position = {
+ +            clear_rect->rect.offset.x,
+ +            clear_rect->rect.offset.y,
+ +         },
+ +      },
+ +      {
+ +         .vue_header = { 0 },
+ +         .position = {
+ +            clear_rect->rect.offset.x + clear_rect->rect.extent.width,
+ +            clear_rect->rect.offset.y,
+ +         },
+ +      },
+ +      {
+ +         .vue_header = { 0 },
+ +         .position = {
+ +            clear_rect->rect.offset.x + clear_rect->rect.extent.width,
+ +            clear_rect->rect.offset.y + clear_rect->rect.extent.height,
+ +         },
+ +      },
+ +   };
+ +
+ +   struct anv_state state =
+ +      anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16);
+ +
+ +   struct anv_buffer vertex_buffer = {
+ +      .device = device,
+ +      .size = sizeof(vertex_data),
+ +      .bo = &device->dynamic_state_block_pool.bo,
+ +      .offset = state.offset,
+ +   };
+ +
+ +   ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1,
+ +      (VkViewport[]) {
+ +         {
+ +            .x = 0,
+ +            .y = 0,
+ +            .width = fb->width,
+ +            .height = fb->height,
+ +
+ +            /* Ignored when clearing only stencil. */
+ +            .minDepth = clear_value.depth,
+ +            .maxDepth = clear_value.depth,
+ +         },
+ +      });
+ +
+ +   ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1,
+ +      (VkRect2D[]) {
+ +         {
+ +            .offset = { 0, 0 },
+ +            .extent = { fb->width, fb->height },
+ +         }
+ +      });
+ +
+ +   if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ +      ANV_CALL(CmdSetStencilReference)(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
+ +                                       clear_value.stencil);
+ +   }
+ +
+ +   ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1,
+ +      (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) },
+ +      (VkDeviceSize[]) { 0 });
+ +
+ +   struct anv_pipeline *pipeline;
+ +   switch (aspects) {
+ +   case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
+ +      pipeline = meta_state->clear[samples_log2].depthstencil_pipeline;
+ +      break;
+ +   case VK_IMAGE_ASPECT_DEPTH_BIT:
+ +      pipeline = meta_state->clear[samples_log2].depth_only_pipeline;
+ +      break;
+ +   case VK_IMAGE_ASPECT_STENCIL_BIT:
+ +      pipeline = meta_state->clear[samples_log2].stencil_only_pipeline;
+ +      break;
+ +   default:
+ +      unreachable("expected depth or stencil aspect");
+ +   }
+ +
+ +   if (cmd_buffer->state.pipeline != pipeline) {
+ +      ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +                                anv_pipeline_to_handle(pipeline));
+ +   }
+ +
+ +   ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0);
+ +}
+ +
+ +VkResult
+ +anv_device_init_meta_clear_state(struct anv_device *device)
+ +{
+ +   VkResult res;
+ +   struct anv_meta_state *state = &device->meta_state;
+ +
+ +   zero(device->meta_state.clear);
+ +
+ +   for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
+ +      uint32_t samples = 1 << i;
+ +
+ +      for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {
+ +         res = create_color_pipeline(device, samples, /* frag_output */ j,
+ +                                     &state->clear[i].color_pipelines[j]);
+ +         if (res != VK_SUCCESS)
+ +            goto fail;
+ +      }
+ +
+ +      res = create_depthstencil_pipeline(device,
+ +                                         VK_IMAGE_ASPECT_DEPTH_BIT, samples,
+ +                                         &state->clear[i].depth_only_pipeline);
+ +      if (res != VK_SUCCESS)
+ +         goto fail;
+ +
+ +      res = create_depthstencil_pipeline(device,
+ +                                         VK_IMAGE_ASPECT_STENCIL_BIT, samples,
+ +                                         &state->clear[i].stencil_only_pipeline);
+ +      if (res != VK_SUCCESS)
+ +         goto fail;
+ +
+ +      res = create_depthstencil_pipeline(device,
+ +                                         VK_IMAGE_ASPECT_DEPTH_BIT |
+ +                                         VK_IMAGE_ASPECT_STENCIL_BIT, samples,
+ +                                         &state->clear[i].depthstencil_pipeline);
+ +      if (res != VK_SUCCESS)
+ +         goto fail;
+ +   }
+ +
+ +   return VK_SUCCESS;
+ +
+ +fail:
+ +   anv_device_finish_meta_clear_state(device);
+ +   return res;
+ +}
+ +
+ +/**
+ + * The parameters mean that same as those in vkCmdClearAttachments.
+ + */
+ +static void
+ +emit_clear(struct anv_cmd_buffer *cmd_buffer,
+ +           const VkClearAttachment *clear_att,
+ +           const VkClearRect *clear_rect)
+ +{
+ +   if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+ +      emit_color_clear(cmd_buffer, clear_att, clear_rect);
+ +   } else {
+ +      assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
+ +                                      VK_IMAGE_ASPECT_STENCIL_BIT));
+ +      emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect);
+ +   }
+ +}
+ +
+ +static bool
+ +subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer)
+ +{
+ +   const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+ +   uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
+ +
+ +   for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
+ +      uint32_t a = cmd_state->subpass->color_attachments[i];
+ +      if (cmd_state->attachments[a].pending_clear_aspects) {
+ +         return true;
+ +      }
+ +   }
+ +
+ +   if (ds != VK_ATTACHMENT_UNUSED &&
+ +       cmd_state->attachments[ds].pending_clear_aspects) {
+ +      return true;
+ +   }
+ +
+ +   return false;
+ +}
+ +
+ +/**
+ + * Emit any pending attachment clears for the current subpass.
+ + *
+ + * @see anv_attachment_state::pending_clear_aspects
+ + */
+ +void
+ +anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
+ +{
+ +   struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+ +   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   if (!subpass_needs_clear(cmd_buffer))
+ +      return;
+ +
+ +   meta_clear_begin(&saved_state, cmd_buffer);
+ +
+ +   if (cmd_state->framebuffer->layers > 1)
+ +      anv_finishme("clearing multi-layer framebuffer");
+ +
+ +   VkClearRect clear_rect = {
+ +      .rect = {
+ +         .offset = { 0, 0 },
+ +         .extent = { fb->width, fb->height },
+ +      },
+ +      .baseArrayLayer = 0,
+ +      .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
+ +   };
+ +
+ +   for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
+ +      uint32_t a = cmd_state->subpass->color_attachments[i];
+ +
+ +      if (!cmd_state->attachments[a].pending_clear_aspects)
+ +         continue;
+ +
+ +      assert(cmd_state->attachments[a].pending_clear_aspects ==
+ +             VK_IMAGE_ASPECT_COLOR_BIT);
+ +
+ +      VkClearAttachment clear_att = {
+ +         .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +         .colorAttachment = i, /* Use attachment index relative to subpass */
+ +         .clearValue = cmd_state->attachments[a].clear_value,
+ +      };
+ +
+ +      emit_clear(cmd_buffer, &clear_att, &clear_rect);
+ +      cmd_state->attachments[a].pending_clear_aspects = 0;
+ +   }
+ +
+ +   uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
+ +
+ +   if (ds != VK_ATTACHMENT_UNUSED &&
+ +       cmd_state->attachments[ds].pending_clear_aspects) {
+ +
+ +      VkClearAttachment clear_att = {
+ +         .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
+ +         .clearValue = cmd_state->attachments[ds].clear_value,
+ +      };
+ +
+ +      emit_clear(cmd_buffer, &clear_att, &clear_rect);
+ +      cmd_state->attachments[ds].pending_clear_aspects = 0;
+ +   }
+ +
+ +   meta_clear_end(&saved_state, cmd_buffer);
+ +}
+ +
+ +static void
+ +anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
+ +                    struct anv_image *image,
+ +                    VkImageLayout image_layout,
+ +                    const VkClearValue *clear_value,
+ +                    uint32_t range_count,
+ +                    const VkImageSubresourceRange *ranges)
+ +{
+ +   VkDevice device_h = anv_device_to_handle(cmd_buffer->device);
+ +
+ +   for (uint32_t r = 0; r < range_count; r++) {
+ +      const VkImageSubresourceRange *range = &ranges[r];
+ +
+ +      for (uint32_t l = 0; l < range->levelCount; ++l) {
+ +         for (uint32_t s = 0; s < range->layerCount; ++s) {
+ +            struct anv_image_view iview;
+ +            anv_image_view_init(&iview, cmd_buffer->device,
+ +               &(VkImageViewCreateInfo) {
+ +                  .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +                  .image = anv_image_to_handle(image),
+ +                  .viewType = anv_meta_get_view_type(image),
+ +                  .format = image->vk_format,
+ +                  .subresourceRange = {
+ +                     .aspectMask = range->aspectMask,
+ +                     .baseMipLevel = range->baseMipLevel + l,
+ +                     .levelCount = 1,
+ +                     .baseArrayLayer = range->baseArrayLayer + s,
+ +                     .layerCount = 1
+ +                  },
+ +               },
+ +               cmd_buffer, 0);
+ +
+ +            VkFramebuffer fb;
+ +            anv_CreateFramebuffer(device_h,
+ +               &(VkFramebufferCreateInfo) {
+ +                  .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ +                  .attachmentCount = 1,
+ +                  .pAttachments = (VkImageView[]) {
+ +                     anv_image_view_to_handle(&iview),
+ +                  },
+ +                  .width = iview.extent.width,
+ +                  .height = iview.extent.height,
+ +                  .layers = 1
+ +               },
+ +               &cmd_buffer->pool->alloc,
+ +               &fb);
+ +
+ +            VkAttachmentDescription att_desc = {
+ +               .format = iview.vk_format,
+ +               .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ +               .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ +               .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ +               .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+ +               .initialLayout = image_layout,
+ +               .finalLayout = image_layout,
+ +            };
+ +
+ +            VkSubpassDescription subpass_desc = {
+ +               .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +               .inputAttachmentCount = 0,
+ +               .colorAttachmentCount = 0,
+ +               .pColorAttachments = NULL,
+ +               .pResolveAttachments = NULL,
+ +               .pDepthStencilAttachment = NULL,
+ +               .preserveAttachmentCount = 0,
+ +               .pPreserveAttachments = NULL,
+ +            };
+ +
+ +            const VkAttachmentReference att_ref = {
+ +               .attachment = 0,
+ +               .layout = image_layout,
+ +            };
+ +
+ +            if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+ +               subpass_desc.colorAttachmentCount = 1;
+ +               subpass_desc.pColorAttachments = &att_ref;
+ +            } else {
+ +               subpass_desc.pDepthStencilAttachment = &att_ref;
+ +            }
+ +
+ +            VkRenderPass pass;
+ +            anv_CreateRenderPass(device_h,
+ +               &(VkRenderPassCreateInfo) {
+ +                  .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ +                  .attachmentCount = 1,
+ +                  .pAttachments = &att_desc,
+ +                  .subpassCount = 1,
+ +                  .pSubpasses = &subpass_desc,
+ +               },
+ +               &cmd_buffer->pool->alloc,
+ +               &pass);
+ +
+ +            ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+ +               &(VkRenderPassBeginInfo) {
+ +                  .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ +                  .renderArea = {
+ +                     .offset = { 0, 0, },
+ +                     .extent = {
+ +                        .width = iview.extent.width,
+ +                        .height = iview.extent.height,
+ +                     },
+ +                  },
+ +                  .renderPass = pass,
+ +                  .framebuffer = fb,
+ +                  .clearValueCount = 0,
+ +                  .pClearValues = NULL,
+ +               },
+ +               VK_SUBPASS_CONTENTS_INLINE);
+ +
+ +            VkClearAttachment clear_att = {
+ +               .aspectMask = range->aspectMask,
+ +               .colorAttachment = 0,
+ +               .clearValue = *clear_value,
+ +            };
+ +
+ +            VkClearRect clear_rect = {
+ +               .rect = {
+ +                  .offset = { 0, 0 },
+ +                  .extent = { iview.extent.width, iview.extent.height },
+ +               },
+ +               .baseArrayLayer = range->baseArrayLayer,
+ +               .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
+ +            };
+ +
+ +            emit_clear(cmd_buffer, &clear_att, &clear_rect);
+ +
+ +            ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+ +            ANV_CALL(DestroyRenderPass)(device_h, pass,
+ +                                        &cmd_buffer->pool->alloc);
+ +            ANV_CALL(DestroyFramebuffer)(device_h, fb,
+ +                                         &cmd_buffer->pool->alloc);
+ +         }
+ +      }
+ +   }
+ +}
+ +
+ +void anv_CmdClearColorImage(
+ +    VkCommandBuffer                             commandBuffer,
+ +    VkImage                                     image_h,
+ +    VkImageLayout                               imageLayout,
+ +    const VkClearColorValue*                    pColor,
+ +    uint32_t                                    rangeCount,
+ +    const VkImageSubresourceRange*              pRanges)
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ +   ANV_FROM_HANDLE(anv_image, image, image_h);
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   meta_clear_begin(&saved_state, cmd_buffer);
+ +
+ +   anv_cmd_clear_image(cmd_buffer, image, imageLayout,
+ +                       (const VkClearValue *) pColor,
+ +                       rangeCount, pRanges);
+ +
+ +   meta_clear_end(&saved_state, cmd_buffer);
+ +}
+ +
+ +void anv_CmdClearDepthStencilImage(
+ +    VkCommandBuffer                             commandBuffer,
+ +    VkImage                                     image_h,
+ +    VkImageLayout                               imageLayout,
+ +    const VkClearDepthStencilValue*             pDepthStencil,
+ +    uint32_t                                    rangeCount,
+ +    const VkImageSubresourceRange*              pRanges)
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ +   ANV_FROM_HANDLE(anv_image, image, image_h);
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   meta_clear_begin(&saved_state, cmd_buffer);
+ +
+ +   anv_cmd_clear_image(cmd_buffer, image, imageLayout,
+ +                       (const VkClearValue *) pDepthStencil,
+ +                       rangeCount, pRanges);
+ +
+ +   meta_clear_end(&saved_state, cmd_buffer);
+ +}
+ +
+ +void anv_CmdClearAttachments(
+ +    VkCommandBuffer                             commandBuffer,
+ +    uint32_t                                    attachmentCount,
+ +    const VkClearAttachment*                    pAttachments,
+ +    uint32_t                                    rectCount,
+ +    const VkClearRect*                          pRects)
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   meta_clear_begin(&saved_state, cmd_buffer);
+ +
+ +   /* FINISHME: We can do better than this dumb loop. It thrashes too much
+ +    * state.
+ +    */
+ +   for (uint32_t a = 0; a < attachmentCount; ++a) {
+ +      for (uint32_t r = 0; r < rectCount; ++r) {
+ +         emit_clear(cmd_buffer, &pAttachments[a], &pRects[r]);
+ +      }
+ +   }
+ +
+ +   meta_clear_end(&saved_state, cmd_buffer);
+ +}
+ +
+ +static void
+ +do_buffer_fill(struct anv_cmd_buffer *cmd_buffer,
+ +               struct anv_bo *dest, uint64_t dest_offset,
+ +               int width, int height, VkFormat fill_format, uint32_t data)
+ +{
+ +   VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
+ +
+ +   VkImageCreateInfo image_info = {
+ +      .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ +      .imageType = VK_IMAGE_TYPE_2D,
+ +      .format = fill_format,
+ +      .extent = {
+ +         .width = width,
+ +         .height = height,
+ +         .depth = 1,
+ +      },
+ +      .mipLevels = 1,
+ +      .arrayLayers = 1,
+ +      .samples = 1,
+ +      .tiling = VK_IMAGE_TILING_LINEAR,
+ +      .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ +      .flags = 0,
+ +   };
+ +
+ +   VkImage dest_image;
+ +   image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ +   anv_CreateImage(vk_device, &image_info,
+ +                   &cmd_buffer->pool->alloc, &dest_image);
+ +
+ +   /* We could use a vk call to bind memory, but that would require
+ +    * creating a dummy memory object etc. so there's really no point.
+ +    */
+ +   anv_image_from_handle(dest_image)->bo = dest;
+ +   anv_image_from_handle(dest_image)->offset = dest_offset;
+ +
+ +   const VkClearValue clear_value = {
+ +      .color = {
+ +         .uint32 = { data, data, data, data }
+ +      }
+ +   };
+ +
+ +   const VkImageSubresourceRange range = {
+ +      .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +      .baseMipLevel = 0,
+ +      .levelCount = 1,
+ +      .baseArrayLayer = 0,
+ +      .layerCount = 1,
+ +   };
+ +
+ +   anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image),
+ +                       VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ +                       &clear_value, 1, &range);
+ +}
+ +
+ +void anv_CmdFillBuffer(
+ +    VkCommandBuffer                             commandBuffer,
+ +    VkBuffer                                    dstBuffer,
+ +    VkDeviceSize                                dstOffset,
+ +    VkDeviceSize                                fillSize,
+ +    uint32_t                                    data)
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ +   ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   meta_clear_begin(&saved_state, cmd_buffer);
+ +
+ +   VkFormat format;
+ +   int bs;
+ +   if ((fillSize & 15) == 0 && (dstOffset & 15) == 0) {
+ +      format = VK_FORMAT_R32G32B32A32_UINT;
+ +      bs = 16;
+ +   } else if ((fillSize & 7) == 0 && (dstOffset & 15) == 0) {
+ +      format = VK_FORMAT_R32G32_UINT;
+ +      bs = 8;
+ +   } else {
+ +      assert((fillSize & 3) == 0 && (dstOffset & 3) == 0);
+ +      format = VK_FORMAT_R32_UINT;
+ +      bs = 4;
+ +   }
+ +
+ +   /* This is maximum possible width/height our HW can handle */
+ +   const uint64_t max_surface_dim = 1 << 14;
+ +
+ +   /* First, we make a bunch of max-sized copies */
+ +   const uint64_t max_fill_size = max_surface_dim * max_surface_dim * bs;
+ +   while (fillSize > max_fill_size) {
+ +      do_buffer_fill(cmd_buffer, dst_buffer->bo,
+ +                     dst_buffer->offset + dstOffset,
+ +                     max_surface_dim, max_surface_dim, format, data);
+ +      fillSize -= max_fill_size;
+ +      dstOffset += max_fill_size;
+ +   }
+ +
+ +   uint64_t height = fillSize / (max_surface_dim * bs);
+ +   assert(height < max_surface_dim);
+ +   if (height != 0) {
+ +      const uint64_t rect_fill_size = height * max_surface_dim * bs;
+ +      do_buffer_fill(cmd_buffer, dst_buffer->bo,
+ +                     dst_buffer->offset + dstOffset,
+ +                     max_surface_dim, height, format, data);
+ +      fillSize -= rect_fill_size;
+ +      dstOffset += rect_fill_size;
+ +   }
+ +
+ +   if (fillSize != 0) {
+ +      do_buffer_fill(cmd_buffer, dst_buffer->bo,
+ +                     dst_buffer->offset + dstOffset,
+ +                     fillSize / bs, 1, format, data);
+ +   }
+ +
+ +   meta_clear_end(&saved_state, cmd_buffer);
+ +}
diff --cc src/vulkan/anv_meta_resolve.c

index 2107a758fdea563314c3d2c3013987847a15aad2,0000000000000000000000000000000000000000..f1c985e04cf4feb15edb6f06d21d3e1ff29191c5

mode 100644,000000..100644
--- 1/src/vulkan/anv_meta_resolve.c
--- /dev/null
+++ b/src/vulkan/anv_meta_resolve.c
@@@ -1,865 -1,0 +1,865 @@@
- #include "glsl/nir/nir_builder.h"
+ +/*
+ + * Copyright © 2016 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include <assert.h>
+ +#include <stdbool.h>
+ +
+ +#include "anv_meta.h"
+ +#include "anv_private.h"
++#include "nir/nir_builder.h"
+ +
+ +/**
+ + * Vertex attributes used by all pipelines.
+ + */
+ +struct vertex_attrs {
+ +   struct anv_vue_header vue_header;
+ +   float position[2]; /**< 3DPRIM_RECTLIST */
+ +   float tex_position[2];
+ +};
+ +
+ +static void
+ +meta_resolve_save(struct anv_meta_saved_state *saved_state,
+ +                  struct anv_cmd_buffer *cmd_buffer)
+ +{
+ +   anv_meta_save(saved_state, cmd_buffer,
+ +                 (1 << VK_DYNAMIC_STATE_VIEWPORT) |
+ +                 (1 << VK_DYNAMIC_STATE_SCISSOR));
+ +
+ +   cmd_buffer->state.dynamic.viewport.count = 0;
+ +   cmd_buffer->state.dynamic.scissor.count = 0;
+ +}
+ +
+ +static void
+ +meta_resolve_restore(struct anv_meta_saved_state *saved_state,
+ +                     struct anv_cmd_buffer *cmd_buffer)
+ +{
+ +   anv_meta_restore(saved_state, cmd_buffer);
+ +}
+ +
+ +static VkPipeline *
+ +get_pipeline_h(struct anv_device *device, uint32_t samples)
+ +{
+ +   uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */
+ +
+ +   assert(samples >= 2);
+ +   assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines));
+ +
+ +   return &device->meta_state.resolve.pipelines[i];
+ +}
+ +
+ +static nir_shader *
+ +build_nir_vs(void)
+ +{
+ +   const struct glsl_type *vec4 = glsl_vec4_type();
+ +
+ +   nir_builder b;
+ +   nir_variable *a_position;
+ +   nir_variable *v_position;
+ +   nir_variable *a_tex_position;
+ +   nir_variable *v_tex_position;
+ +
+ +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+ +   b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs");
+ +
+ +   a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ +                                    "a_position");
+ +   a_position->data.location = VERT_ATTRIB_GENERIC0;
+ +
+ +   v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ +                                    "gl_Position");
+ +   v_position->data.location = VARYING_SLOT_POS;
+ +
+ +   a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ +                                    "a_tex_position");
+ +   a_tex_position->data.location = VERT_ATTRIB_GENERIC1;
+ +
+ +   v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ +                                    "v_tex_position");
+ +   v_tex_position->data.location = VARYING_SLOT_VAR0;
+ +
+ +   nir_copy_var(&b, v_position, a_position);
+ +   nir_copy_var(&b, v_tex_position, a_tex_position);
+ +
+ +   return b.shader;
+ +}
+ +
+ +static nir_shader *
+ +build_nir_fs(uint32_t num_samples)
+ +{
+ +   const struct glsl_type *vec4 = glsl_vec4_type();
+ +
+ +   const struct glsl_type *sampler2DMS =
+ +         glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
+ +                           /*is_shadow*/ false,
+ +                           /*is_array*/ false,
+ +                           GLSL_TYPE_FLOAT);
+ +
+ +   nir_builder b;
+ +   nir_variable *u_tex; /* uniform sampler */
+ +   nir_variable *v_position; /* vec4, varying fragment position */
+ +   nir_variable *v_tex_position; /* vec4, varying texture coordinate */
+ +   nir_variable *f_color; /* vec4, fragment output color */
+ +   nir_ssa_def *accum; /* vec4, accumulation of sample values */
+ +
+ +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ +   b.shader->info.name = ralloc_asprintf(b.shader,
+ +                                         "meta_resolve_fs_samples%02d",
+ +                                         num_samples);
+ +
+ +   u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS,
+ +                                   "u_tex");
+ +   u_tex->data.descriptor_set = 0;
+ +   u_tex->data.binding = 0;
+ +
+ +   v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ +                                     "v_position");
+ +   v_position->data.location = VARYING_SLOT_POS;
+ +
+ +   v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ +                                    "v_tex_position");
+ +   v_tex_position->data.location = VARYING_SLOT_VAR0;
+ +
+ +   f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ +                                 "f_color");
+ +   f_color->data.location = FRAG_RESULT_DATA0;
+ +
+ +   accum = nir_imm_vec4(&b, 0, 0, 0, 0);
+ +
+ +   nir_ssa_def *tex_position_ivec =
+ +      nir_f2i(&b, nir_load_var(&b, v_tex_position));
+ +
+ +   for (uint32_t i = 0; i < num_samples; ++i) {
+ +      nir_tex_instr *tex;
+ +
+ +      tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2);
+ +      tex->sampler = nir_deref_var_create(tex, u_tex);
+ +      tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ +      tex->op = nir_texop_txf_ms;
+ +      tex->src[0].src = nir_src_for_ssa(tex_position_ivec);
+ +      tex->src[0].src_type = nir_tex_src_coord;
+ +      tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+ +      tex->src[1].src_type = nir_tex_src_ms_index;
+ +      tex->dest_type = nir_type_float;
+ +      tex->is_array = false;
+ +      tex->coord_components = 3;
+ +      nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex");
+ +      nir_builder_instr_insert(&b, &tex->instr);
+ +
+ +      accum = nir_fadd(&b, accum, &tex->dest.ssa);
+ +   }
+ +
+ +   accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples));
+ +   nir_store_var(&b, f_color, accum, /*writemask*/ 4);
+ +
+ +   return b.shader;
+ +}
+ +
+ +static VkResult
+ +create_pass(struct anv_device *device)
+ +{
+ +   VkResult result;
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+ +
+ +   result = anv_CreateRenderPass(device_h,
+ +      &(VkRenderPassCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ +         .attachmentCount = 1,
+ +         .pAttachments = &(VkAttachmentDescription) {
+ +            .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
+ +            .samples = 1,
+ +            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ +            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ +            .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +            .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +         },
+ +         .subpassCount = 1,
+ +         .pSubpasses = &(VkSubpassDescription) {
+ +            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +            .inputAttachmentCount = 0,
+ +            .colorAttachmentCount = 1,
+ +            .pColorAttachments = &(VkAttachmentReference) {
+ +               .attachment = 0,
+ +               .layout = VK_IMAGE_LAYOUT_GENERAL,
+ +            },
+ +            .pResolveAttachments = NULL,
+ +            .pDepthStencilAttachment = &(VkAttachmentReference) {
+ +               .attachment = VK_ATTACHMENT_UNUSED,
+ +            },
+ +            .preserveAttachmentCount = 0,
+ +            .pPreserveAttachments = NULL,
+ +         },
+ +         .dependencyCount = 0,
+ +      },
+ +      alloc,
+ +      &device->meta_state.resolve.pass);
+ +
+ +   return result;
+ +}
+ +
+ +static VkResult
+ +create_pipeline(struct anv_device *device,
+ +                uint32_t num_samples,
+ +                VkShaderModule vs_module_h)
+ +{
+ +   VkResult result;
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +
+ +   struct anv_shader_module fs_module = {
+ +      .nir = build_nir_fs(num_samples),
+ +   };
+ +
+ +   if (!fs_module.nir) {
+ +      /* XXX: Need more accurate error */
+ +      result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ +      goto cleanup;
+ +   }
+ +
+ +   result = anv_graphics_pipeline_create(device_h,
+ +      VK_NULL_HANDLE,
+ +      &(VkGraphicsPipelineCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ +         .stageCount = 2,
+ +         .pStages = (VkPipelineShaderStageCreateInfo[]) {
+ +            {
+ +               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ +               .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ +               .module = vs_module_h,
+ +               .pName = "main",
+ +            },
+ +            {
+ +               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ +               .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ +               .module = anv_shader_module_to_handle(&fs_module),
+ +               .pName = "main",
+ +            },
+ +         },
+ +         .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ +            .vertexBindingDescriptionCount = 1,
+ +            .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ +               {
+ +                  .binding = 0,
+ +                  .stride = sizeof(struct vertex_attrs),
+ +                  .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ +               },
+ +            },
+ +            .vertexAttributeDescriptionCount = 3,
+ +            .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ +               {
+ +                  /* VUE Header */
+ +                  .location = 0,
+ +                  .binding = 0,
+ +                  .format = VK_FORMAT_R32G32B32A32_UINT,
+ +                  .offset = offsetof(struct vertex_attrs, vue_header),
+ +               },
+ +               {
+ +                  /* Position */
+ +                  .location = 1,
+ +                  .binding = 0,
+ +                  .format = VK_FORMAT_R32G32_SFLOAT,
+ +                  .offset = offsetof(struct vertex_attrs, position),
+ +               },
+ +               {
+ +                  /* Texture Coordinate */
+ +                  .location = 2,
+ +                  .binding = 0,
+ +                  .format = VK_FORMAT_R32G32_SFLOAT,
+ +                  .offset = offsetof(struct vertex_attrs, tex_position),
+ +               },
+ +            },
+ +         },
+ +         .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ +            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ +            .primitiveRestartEnable = false,
+ +         },
+ +         .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ +            .viewportCount = 1,
+ +            .scissorCount = 1,
+ +         },
+ +         .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ +            .depthClampEnable = false,
+ +            .rasterizerDiscardEnable = false,
+ +            .polygonMode = VK_POLYGON_MODE_FILL,
+ +            .cullMode = VK_CULL_MODE_NONE,
+ +            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ +         },
+ +         .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ +            .rasterizationSamples = 1,
+ +            .sampleShadingEnable = false,
+ +            .pSampleMask = (VkSampleMask[]) { 0x1 },
+ +            .alphaToCoverageEnable = false,
+ +            .alphaToOneEnable = false,
+ +         },
+ +         .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ +            .logicOpEnable = false,
+ +            .attachmentCount = 1,
+ +            .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+ +               {
+ +                  .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
+ +                                    VK_COLOR_COMPONENT_G_BIT |
+ +                                    VK_COLOR_COMPONENT_B_BIT |
+ +                                    VK_COLOR_COMPONENT_A_BIT,
+ +               },
+ +            },
+ +         },
+ +         .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ +            .dynamicStateCount = 2,
+ +            .pDynamicStates = (VkDynamicState[]) {
+ +               VK_DYNAMIC_STATE_VIEWPORT,
+ +               VK_DYNAMIC_STATE_SCISSOR,
+ +            },
+ +         },
+ +         .layout = device->meta_state.resolve.pipeline_layout,
+ +         .renderPass = device->meta_state.resolve.pass,
+ +         .subpass = 0,
+ +      },
+ +      &(struct anv_graphics_pipeline_create_info) {
+ +         .color_attachment_count = -1,
+ +         .use_repclear = false,
+ +         .disable_viewport = true,
+ +         .disable_scissor = true,
+ +         .disable_vs = true,
+ +         .use_rectlist = true
+ +      },
+ +      &device->meta_state.alloc,
+ +      get_pipeline_h(device, num_samples));
+ +   if (result != VK_SUCCESS)
+ +      goto cleanup;
+ +
+ +   goto cleanup;
+ +
+ +cleanup:
+ +   ralloc_free(fs_module.nir);
+ +   return result;
+ +}
+ +
+ +void
+ +anv_device_finish_meta_resolve_state(struct anv_device *device)
+ +{
+ +   struct anv_meta_state *state = &device->meta_state;
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +   VkRenderPass pass_h = device->meta_state.resolve.pass;
+ +   VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout;
+ +   VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout;
+ +   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+ +
+ +   if (pass_h)
+ +      ANV_CALL(DestroyRenderPass)(device_h, pass_h,
+ +                                  &device->meta_state.alloc);
+ +
+ +   if (pipeline_layout_h)
+ +      ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc);
+ +
+ +   if (ds_layout_h)
+ +      ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc);
+ +
+ +   for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) {
+ +      VkPipeline pipeline_h = state->resolve.pipelines[i];
+ +
+ +      if (pipeline_h) {
+ +         ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc);
+ +      }
+ +   }
+ +}
+ +
+ +VkResult
+ +anv_device_init_meta_resolve_state(struct anv_device *device)
+ +{
+ +   VkResult res = VK_SUCCESS;
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+ +
+ +   const isl_sample_count_mask_t sample_count_mask =
+ +      isl_device_get_sample_counts(&device->isl_dev);
+ +
+ +   zero(device->meta_state.resolve);
+ +
+ +   struct anv_shader_module vs_module = { .nir = build_nir_vs() };
+ +   if (!vs_module.nir) {
+ +      /* XXX: Need more accurate error */
+ +      res = VK_ERROR_OUT_OF_HOST_MEMORY;
+ +      goto fail;
+ +   }
+ +
+ +   VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module);
+ +
+ +   res = anv_CreateDescriptorSetLayout(device_h,
+ +      &(VkDescriptorSetLayoutCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ +         .bindingCount = 1,
+ +         .pBindings = (VkDescriptorSetLayoutBinding[]) {
+ +            {
+ +               .binding = 0,
+ +               .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ +               .descriptorCount = 1,
+ +               .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ +            },
+ +         },
+ +      },
+ +      alloc,
+ +      &device->meta_state.resolve.ds_layout);
+ +   if (res != VK_SUCCESS)
+ +      goto fail;
+ +
+ +   res = anv_CreatePipelineLayout(device_h,
+ +      &(VkPipelineLayoutCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ +         .setLayoutCount = 1,
+ +         .pSetLayouts = (VkDescriptorSetLayout[]) {
+ +            device->meta_state.resolve.ds_layout,
+ +         },
+ +      },
+ +      alloc,
+ +      &device->meta_state.resolve.pipeline_layout);
+ +   if (res != VK_SUCCESS)
+ +      goto fail;
+ +
+ +   res = create_pass(device);
+ +   if (res != VK_SUCCESS)
+ +      goto fail;
+ +
+ +   for (uint32_t i = 0;
+ +        i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) {
+ +
+ +      uint32_t sample_count = 1 << (1 + i);
+ +      if (!(sample_count_mask & sample_count))
+ +         continue;
+ +
+ +      res = create_pipeline(device, sample_count, vs_module_h);
+ +      if (res != VK_SUCCESS)
+ +         goto fail;
+ +   }
+ +
+ +   goto cleanup;
+ +
+ +fail:
+ +   anv_device_finish_meta_resolve_state(device);
+ +
+ +cleanup:
+ +   ralloc_free(vs_module.nir);
+ +
+ +   return res;
+ +}
+ +
+ +static void
+ +emit_resolve(struct anv_cmd_buffer *cmd_buffer,
+ +             struct anv_image_view *src_iview,
+ +             const VkOffset2D *src_offset,
+ +             struct anv_image_view *dest_iview,
+ +             const VkOffset2D *dest_offset,
+ +             const VkExtent2D *resolve_extent)
+ +{
+ +   struct anv_device *device = cmd_buffer->device;
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +   VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer);
+ +   const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ +   const struct anv_image *src_image = src_iview->image;
+ +   VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1;
+ +
+ +   const struct vertex_attrs vertex_data[3] = {
+ +      {
+ +         .vue_header = {0},
+ +         .position = {
+ +            dest_offset->x + resolve_extent->width,
+ +            dest_offset->y + resolve_extent->height,
+ +         },
+ +         .tex_position = {
+ +            src_offset->x + resolve_extent->width,
+ +            src_offset->y + resolve_extent->height,
+ +         },
+ +      },
+ +      {
+ +         .vue_header = {0},
+ +         .position = {
+ +            dest_offset->x,
+ +            dest_offset->y + resolve_extent->height,
+ +         },
+ +         .tex_position = {
+ +            src_offset->x,
+ +            src_offset->y + resolve_extent->height,
+ +         },
+ +      },
+ +      {
+ +         .vue_header = {0},
+ +         .position = {
+ +            dest_offset->x,
+ +            dest_offset->y,
+ +         },
+ +         .tex_position = {
+ +            src_offset->x,
+ +            src_offset->y,
+ +         },
+ +      },
+ +   };
+ +
+ +   struct anv_state vertex_mem =
+ +      anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data,
+ +                                  sizeof(vertex_data), 16);
+ +
+ +   struct anv_buffer vertex_buffer = {
+ +      .device = device,
+ +      .size = sizeof(vertex_data),
+ +      .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo,
+ +      .offset = vertex_mem.offset,
+ +   };
+ +
+ +   VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer);
+ +
+ +   anv_CmdBindVertexBuffers(cmd_buffer_h,
+ +      /*firstBinding*/ 0,
+ +      /*bindingCount*/ 1,
+ +      (VkBuffer[]) { vertex_buffer_h },
+ +      (VkDeviceSize[]) { 0 });
+ +
+ +   VkSampler sampler_h;
+ +   ANV_CALL(CreateSampler)(device_h,
+ +      &(VkSamplerCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ +         .magFilter = VK_FILTER_NEAREST,
+ +         .minFilter = VK_FILTER_NEAREST,
+ +         .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
+ +         .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ +         .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ +         .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ +         .mipLodBias = 0.0,
+ +         .anisotropyEnable = false,
+ +         .compareEnable = false,
+ +         .minLod = 0.0,
+ +         .maxLod = 0.0,
+ +         .unnormalizedCoordinates = false,
+ +      },
+ +      &cmd_buffer->pool->alloc,
+ +      &sampler_h);
+ +
+ +   VkDescriptorSet desc_set_h;
+ +   anv_AllocateDescriptorSets(device_h,
+ +      &(VkDescriptorSetAllocateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ +         .descriptorPool = dummy_desc_pool_h,
+ +         .descriptorSetCount = 1,
+ +         .pSetLayouts = (VkDescriptorSetLayout[]) {
+ +            device->meta_state.blit.ds_layout,
+ +         },
+ +      },
+ +      &desc_set_h);
+ +
+ +   ANV_FROM_HANDLE(anv_descriptor_set, desc_set, desc_set_h);
+ +
+ +   anv_UpdateDescriptorSets(device_h,
+ +      /*writeCount*/ 1,
+ +      (VkWriteDescriptorSet[]) {
+ +         {
+ +            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ +            .dstSet = desc_set_h,
+ +            .dstBinding = 0,
+ +            .dstArrayElement = 0,
+ +            .descriptorCount = 1,
+ +            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ +            .pImageInfo = (VkDescriptorImageInfo[]) {
+ +               {
+ +                  .sampler = sampler_h,
+ +                  .imageView = anv_image_view_to_handle(src_iview),
+ +                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +               },
+ +            },
+ +         },
+ +      },
+ +      /*copyCount*/ 0,
+ +      /*copies */ NULL);
+ +
+ +   ANV_CALL(CmdSetViewport)(cmd_buffer_h,
+ +      /*firstViewport*/ 0,
+ +      /*viewportCount*/ 1,
+ +      (VkViewport[]) {
+ +         {
+ +            .x = 0,
+ +            .y = 0,
+ +            .width = fb->width,
+ +            .height = fb->height,
+ +            .minDepth = 0.0,
+ +            .maxDepth = 1.0,
+ +         },
+ +      });
+ +
+ +   ANV_CALL(CmdSetScissor)(cmd_buffer_h,
+ +      /*firstScissor*/ 0,
+ +      /*scissorCount*/ 1,
+ +      (VkRect2D[]) {
+ +         {
+ +            .offset = { 0, 0 },
+ +            .extent = (VkExtent2D) { fb->width, fb->height },
+ +         },
+ +      });
+ +
+ +   VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples);
+ +   ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h);
+ +
+ +   if (cmd_buffer->state.pipeline != pipeline) {
+ +      anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +                          pipeline_h);
+ +   }
+ +
+ +   anv_CmdBindDescriptorSets(cmd_buffer_h,
+ +      VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +      device->meta_state.resolve.pipeline_layout,
+ +      /*firstSet*/ 0,
+ +      /* setCount */ 1,
+ +      (VkDescriptorSet[]) {
+ +         desc_set_h,
+ +      },
+ +      /*copyCount*/ 0,
+ +      /*copies */ NULL);
+ +
+ +   ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0);
+ +
+ +   /* All objects below are consumed by the draw call. We may safely destroy
+ +    * them.
+ +    */
+ +   anv_descriptor_set_destroy(device, desc_set);
+ +   anv_DestroySampler(device_h, sampler_h,
+ +                      &cmd_buffer->pool->alloc);
+ +}
+ +
+ +void anv_CmdResolveImage(
+ +    VkCommandBuffer                             cmd_buffer_h,
+ +    VkImage                                     src_image_h,
+ +    VkImageLayout                               src_image_layout,
+ +    VkImage                                     dest_image_h,
+ +    VkImageLayout                               dest_image_layout,
+ +    uint32_t                                    region_count,
+ +    const VkImageResolve*                       regions)
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h);
+ +   ANV_FROM_HANDLE(anv_image, src_image, src_image_h);
+ +   ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h);
+ +   struct anv_device *device = cmd_buffer->device;
+ +   struct anv_meta_saved_state state;
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +
+ +   meta_resolve_save(&state, cmd_buffer);
+ +
+ +   assert(src_image->samples > 1);
+ +   assert(dest_image->samples == 1);
+ +
+ +   if (src_image->samples >= 16) {
+ +      /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the
+ +       * glBlitFramebuffer workaround for samples >= 16.
+ +       */
+ +      anv_finishme("vkCmdResolveImage: need interpolation workaround when "
+ +                   "samples >= 16");
+ +   }
+ +
+ +   if (src_image->array_size > 1)
+ +      anv_finishme("vkCmdResolveImage: multisample array images");
+ +
+ +   for (uint32_t r = 0; r < region_count; ++r) {
+ +      const VkImageResolve *region = &regions[r];
+ +
+ +      /* From the Vulkan 1.0 spec:
+ +       *
+ +       *    - The aspectMask member of srcSubresource and dstSubresource must
+ +       *      only contain VK_IMAGE_ASPECT_COLOR_BIT
+ +       *
+ +       *    - The layerCount member of srcSubresource and dstSubresource must
+ +       *      match
+ +       */
+ +      assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ +      assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ +      assert(region->srcSubresource.layerCount ==
+ +             region->dstSubresource.layerCount);
+ +
+ +      const uint32_t src_base_layer =
+ +         anv_meta_get_iview_layer(src_image, &region->srcSubresource,
+ +                                  &region->srcOffset);
+ +
+ +      const uint32_t dest_base_layer =
+ +         anv_meta_get_iview_layer(dest_image, &region->dstSubresource,
+ +                                  &region->dstOffset);
+ +
+ +      for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
+ +           ++layer) {
+ +
+ +         struct anv_image_view src_iview;
+ +         anv_image_view_init(&src_iview, cmd_buffer->device,
+ +            &(VkImageViewCreateInfo) {
+ +               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +               .image = src_image_h,
+ +               .viewType = anv_meta_get_view_type(src_image),
+ +               .format = src_image->format->vk_format,
+ +               .subresourceRange = {
+ +                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +                  .baseMipLevel = region->srcSubresource.mipLevel,
+ +                  .levelCount = 1,
+ +                  .baseArrayLayer = src_base_layer + layer,
+ +                  .layerCount = 1,
+ +               },
+ +            },
+ +            cmd_buffer, 0);
+ +
+ +         struct anv_image_view dest_iview;
+ +         anv_image_view_init(&dest_iview, cmd_buffer->device,
+ +            &(VkImageViewCreateInfo) {
+ +               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +               .image = dest_image_h,
+ +               .viewType = anv_meta_get_view_type(dest_image),
+ +               .format = dest_image->format->vk_format,
+ +               .subresourceRange = {
+ +                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +                  .baseMipLevel = region->dstSubresource.mipLevel,
+ +                  .levelCount = 1,
+ +                  .baseArrayLayer = dest_base_layer + layer,
+ +                  .layerCount = 1,
+ +               },
+ +            },
+ +            cmd_buffer, 0);
+ +
+ +         VkFramebuffer fb_h;
+ +         anv_CreateFramebuffer(device_h,
+ +            &(VkFramebufferCreateInfo) {
+ +               .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ +               .attachmentCount = 1,
+ +               .pAttachments = (VkImageView[]) {
+ +                  anv_image_view_to_handle(&dest_iview),
+ +               },
+ +               .width = anv_minify(dest_image->extent.width,
+ +                                   region->dstSubresource.mipLevel),
+ +               .height = anv_minify(dest_image->extent.height,
+ +                                    region->dstSubresource.mipLevel),
+ +               .layers = 1
+ +            },
+ +            &cmd_buffer->pool->alloc,
+ +            &fb_h);
+ +
+ +         ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h,
+ +            &(VkRenderPassBeginInfo) {
+ +               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ +               .renderPass = device->meta_state.resolve.pass,
+ +               .framebuffer = fb_h,
+ +               .renderArea = {
+ +                  .offset = {
+ +                     region->dstOffset.x,
+ +                     region->dstOffset.y,
+ +                  },
+ +                  .extent = {
+ +                     region->extent.width,
+ +                     region->extent.height,
+ +                  }
+ +               },
+ +               .clearValueCount = 0,
+ +               .pClearValues = NULL,
+ +            },
+ +            VK_SUBPASS_CONTENTS_INLINE);
+ +
+ +         emit_resolve(cmd_buffer,
+ +             &src_iview,
+ +             &(VkOffset2D) {
+ +               .x = region->srcOffset.x,
+ +               .y = region->srcOffset.y,
+ +             },
+ +             &dest_iview,
+ +             &(VkOffset2D) {
+ +               .x = region->dstOffset.x,
+ +               .y = region->dstOffset.y,
+ +             },
+ +             &(VkExtent2D) {
+ +               .width = region->extent.width,
+ +               .height = region->extent.height,
+ +             });
+ +
+ +         ANV_CALL(CmdEndRenderPass)(cmd_buffer_h);
+ +
+ +         anv_DestroyFramebuffer(device_h, fb_h,
+ +                                &cmd_buffer->pool->alloc);
+ +      }
+ +   }
+ +
+ +   meta_resolve_restore(&state, cmd_buffer);
+ +}
+ +
+ +/**
+ + * Emit any needed resolves for the current subpass.
+ + */
+ +void
+ +anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
+ +{
+ +   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ +   struct anv_subpass *subpass = cmd_buffer->state.subpass;
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   /* FINISHME(perf): Skip clears for resolve attachments.
+ +    *
+ +    * From the Vulkan 1.0 spec:
+ +    *
+ +    *    If the first use of an attachment in a render pass is as a resolve
+ +    *    attachment, then the loadOp is effectively ignored as the resolve is
+ +    *    guaranteed to overwrite all pixels in the render area.
+ +    */
+ +
+ +   if (!subpass->has_resolve)
+ +      return;
+ +
+ +   meta_resolve_save(&saved_state, cmd_buffer);
+ +
+ +   for (uint32_t i = 0; i < subpass->color_count; ++i) {
+ +      uint32_t src_att = subpass->color_attachments[i];
+ +      uint32_t dest_att = subpass->resolve_attachments[i];
+ +
+ +      if (dest_att == VK_ATTACHMENT_UNUSED)
+ +         continue;
+ +
+ +      struct anv_image_view *src_iview = fb->attachments[src_att];
+ +      struct anv_image_view *dest_iview = fb->attachments[dest_att];
+ +
+ +      struct anv_subpass resolve_subpass = {
+ +         .color_count = 1,
+ +         .color_attachments = (uint32_t[]) { dest_att },
+ +         .depth_stencil_attachment = VK_ATTACHMENT_UNUSED,
+ +      };
+ +
+ +      anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
+ +
+ +      /* Subpass resolves must respect the render area. We can ignore the
+ +       * render area here because vkCmdBeginRenderPass set the render area
+ +       * with 3DSTATE_DRAWING_RECTANGLE.
+ +       *
+ +       * XXX(chadv): Does the hardware really respect
+ +       * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
+ +       */
+ +      emit_resolve(cmd_buffer,
+ +          src_iview,
+ +          &(VkOffset2D) { 0, 0 },
+ +          dest_iview,
+ +          &(VkOffset2D) { 0, 0 },
+ +          &(VkExtent2D) { fb->width, fb->height });
+ +   }
+ +
+ +   cmd_buffer->state.subpass = subpass;
+ +   meta_resolve_restore(&saved_state, cmd_buffer);
+ +}
diff --cc src/vulkan/anv_nir.h

index 9a7a76fe216839462ec516211063b521c31eb95a,0000000000000000000000000000000000000000..c76314d9df665b223a1100e23a3c38efc57af5b8

mode 100644,000000..100644
--- 1/src/vulkan/anv_nir.h
--- /dev/null
+++ b/src/vulkan/anv_nir.h
@@@ -1,44 -1,0 +1,44 @@@
- #include "glsl/nir/nir.h"
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#pragma once
+ +
++#include "nir/nir.h"
+ +#include "anv_private.h"
+ +
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +
+ +void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar);
+ +
+ +void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
+ +                                   nir_shader *shader,
+ +                                   struct brw_stage_prog_data *prog_data);
+ +bool anv_nir_apply_pipeline_layout(nir_shader *shader,
+ +                                   struct brw_stage_prog_data *prog_data,
+ +                                   const struct anv_pipeline_layout *layout);
+ +
+ +#ifdef __cplusplus
+ +}
+ +#endif
diff --cc src/vulkan/anv_nir_apply_dynamic_offsets.c

index 6837a80460de0da5114fe14c9ab22c25dcebaf6b,0000000000000000000000000000000000000000..a5e3238a36a85a65f2189249a4653ace86448b58

mode 100644,000000..100644
--- 1/src/vulkan/anv_nir_apply_dynamic_offsets.c
--- /dev/null
+++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c
@@@ -1,171 -1,0 +1,171 @@@
- #include "glsl/nir/nir_builder.h"
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include "anv_nir.h"
++#include "nir/nir_builder.h"
+ +
+ +struct apply_dynamic_offsets_state {
+ +   nir_shader *shader;
+ +   nir_builder builder;
+ +
+ +   struct anv_pipeline_layout *layout;
+ +
+ +   uint32_t indices_start;
+ +};
+ +
+ +static bool
+ +apply_dynamic_offsets_block(nir_block *block, void *void_state)
+ +{
+ +   struct apply_dynamic_offsets_state *state = void_state;
+ +   struct anv_descriptor_set_layout *set_layout;
+ +
+ +   nir_builder *b = &state->builder;
+ +
+ +   nir_foreach_instr_safe(block, instr) {
+ +      if (instr->type != nir_instr_type_intrinsic)
+ +         continue;
+ +
+ +      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ +
+ +      unsigned block_idx_src;
+ +      switch (intrin->intrinsic) {
+ +      case nir_intrinsic_load_ubo:
+ +      case nir_intrinsic_load_ssbo:
+ +         block_idx_src = 0;
+ +         break;
+ +      case nir_intrinsic_store_ssbo:
+ +         block_idx_src = 1;
+ +         break;
+ +      default:
+ +         continue; /* the loop */
+ +      }
+ +
+ +      nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr;
+ +      assert(res_instr->type == nir_instr_type_intrinsic);
+ +      nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr);
+ +      assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
+ +
+ +      unsigned set = res_intrin->const_index[0];
+ +      unsigned binding = res_intrin->const_index[1];
+ +
+ +      set_layout = state->layout->set[set].layout;
+ +      if (set_layout->binding[binding].dynamic_offset_index < 0)
+ +         continue;
+ +
+ +      b->cursor = nir_before_instr(&intrin->instr);
+ +
+ +      /* First, we need to generate the uniform load for the buffer offset */
+ +      uint32_t index = state->layout->set[set].dynamic_offset_start +
+ +                       set_layout->binding[binding].dynamic_offset_index;
+ +
+ +      nir_intrinsic_instr *offset_load =
+ +         nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform);
+ +      offset_load->num_components = 2;
+ +      offset_load->const_index[0] = state->indices_start + index * 8;
+ +      offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa,
+ +                                                     nir_imm_int(b, 8)));
+ +
+ +      nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL);
+ +      nir_builder_instr_insert(b, &offset_load->instr);
+ +
+ +      nir_src *offset_src = nir_get_io_offset_src(intrin);
+ +      nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa,
+ +                                         &offset_load->dest.ssa);
+ +
+ +      /* In order to avoid out-of-bounds access, we predicate */
+ +      nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1),
+ +                                  offset_src->ssa);
+ +      nir_if *if_stmt = nir_if_create(b->shader);
+ +      if_stmt->condition = nir_src_for_ssa(pred);
+ +      nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
+ +
+ +      nir_instr_remove(&intrin->instr);
+ +      *offset_src = nir_src_for_ssa(new_offset);
+ +      nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr);
+ +
+ +      if (intrin->intrinsic != nir_intrinsic_store_ssbo) {
+ +         /* It's a load, we need a phi node */
+ +         nir_phi_instr *phi = nir_phi_instr_create(b->shader);
+ +         nir_ssa_dest_init(&phi->instr, &phi->dest,
+ +                           intrin->num_components, NULL);
+ +
+ +         nir_phi_src *src1 = ralloc(phi, nir_phi_src);
+ +         struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list);
+ +         src1->pred = exec_node_data(nir_block, tnode, cf_node.node);
+ +         src1->src = nir_src_for_ssa(&intrin->dest.ssa);
+ +         exec_list_push_tail(&phi->srcs, &src1->node);
+ +
+ +         b->cursor = nir_after_cf_list(&if_stmt->else_list);
+ +         nir_ssa_def *zero = nir_build_imm(b, intrin->num_components,
+ +            (nir_const_value) { .u = { 0, 0, 0, 0 } });
+ +
+ +         nir_phi_src *src2 = ralloc(phi, nir_phi_src);
+ +         struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list);
+ +         src2->pred = exec_node_data(nir_block, enode, cf_node.node);
+ +         src2->src = nir_src_for_ssa(zero);
+ +         exec_list_push_tail(&phi->srcs, &src2->node);
+ +
+ +         assert(intrin->dest.is_ssa);
+ +         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ +                                  nir_src_for_ssa(&phi->dest.ssa));
+ +
+ +         nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr);
+ +      }
+ +   }
+ +
+ +   return true;
+ +}
+ +
+ +void
+ +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
+ +                              nir_shader *shader,
+ +                              struct brw_stage_prog_data *prog_data)
+ +{
+ +   struct apply_dynamic_offsets_state state = {
+ +      .shader = shader,
+ +      .layout = pipeline->layout,
+ +      .indices_start = shader->num_uniforms,
+ +   };
+ +
+ +   if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets)
+ +      return;
+ +
+ +   nir_foreach_function(shader, function) {
+ +      if (function->impl) {
+ +         nir_builder_init(&state.builder, function->impl);
+ +         nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state);
+ +         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ +                                               nir_metadata_dominance);
+ +      }
+ +   }
+ +
+ +   struct anv_push_constants *null_data = NULL;
+ +   for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) {
+ +      prog_data->param[i * 2 + shader->num_uniforms] =
+ +         (const union gl_constant_value *)&null_data->dynamic[i].offset;
+ +      prog_data->param[i * 2 + 1 + shader->num_uniforms] =
+ +         (const union gl_constant_value *)&null_data->dynamic[i].range;
+ +   }
+ +
+ +   shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8;
+ +}
diff --cc src/vulkan/anv_nir_apply_pipeline_layout.c

index b7b8bd18ef9601305ddfc306afdecac9c0501afa,0000000000000000000000000000000000000000..00ed7766acb70eab096092a7bdf1b007bb2d3668

mode 100644,000000..100644
--- 1/src/vulkan/anv_nir_apply_pipeline_layout.c
--- /dev/null
+++ b/src/vulkan/anv_nir_apply_pipeline_layout.c
@@@ -1,322 -1,0 +1,322 @@@
- #include "glsl/nir/nir_builder.h"
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include "anv_nir.h"
+ +#include "program/prog_parameter.h"
++#include "nir/nir_builder.h"
+ +
+ +struct apply_pipeline_layout_state {
+ +   nir_shader *shader;
+ +   nir_builder builder;
+ +
+ +   const struct anv_pipeline_layout *layout;
+ +
+ +   bool progress;
+ +};
+ +
+ +static uint32_t
+ +get_surface_index(unsigned set, unsigned binding,
+ +                  struct apply_pipeline_layout_state *state)
+ +{
+ +   assert(set < state->layout->num_sets);
+ +   struct anv_descriptor_set_layout *set_layout =
+ +      state->layout->set[set].layout;
+ +
+ +   gl_shader_stage stage = state->shader->stage;
+ +
+ +   assert(binding < set_layout->binding_count);
+ +
+ +   assert(set_layout->binding[binding].stage[stage].surface_index >= 0);
+ +
+ +   uint32_t surface_index =
+ +      state->layout->set[set].stage[stage].surface_start +
+ +      set_layout->binding[binding].stage[stage].surface_index;
+ +
+ +   assert(surface_index < state->layout->stage[stage].surface_count);
+ +
+ +   return surface_index;
+ +}
+ +
+ +static uint32_t
+ +get_sampler_index(unsigned set, unsigned binding, nir_texop tex_op,
+ +                  struct apply_pipeline_layout_state *state)
+ +{
+ +   assert(set < state->layout->num_sets);
+ +   struct anv_descriptor_set_layout *set_layout =
+ +      state->layout->set[set].layout;
+ +
+ +   assert(binding < set_layout->binding_count);
+ +
+ +   gl_shader_stage stage = state->shader->stage;
+ +
+ +   if (set_layout->binding[binding].stage[stage].sampler_index < 0) {
+ +      assert(tex_op == nir_texop_txf);
+ +      return 0;
+ +   }
+ +
+ +   uint32_t sampler_index =
+ +      state->layout->set[set].stage[stage].sampler_start +
+ +      set_layout->binding[binding].stage[stage].sampler_index;
+ +
+ +   assert(sampler_index < state->layout->stage[stage].sampler_count);
+ +
+ +   return sampler_index;
+ +}
+ +
+ +static uint32_t
+ +get_image_index(unsigned set, unsigned binding,
+ +                struct apply_pipeline_layout_state *state)
+ +{
+ +   assert(set < state->layout->num_sets);
+ +   struct anv_descriptor_set_layout *set_layout =
+ +      state->layout->set[set].layout;
+ +
+ +   assert(binding < set_layout->binding_count);
+ +
+ +   gl_shader_stage stage = state->shader->stage;
+ +
+ +   assert(set_layout->binding[binding].stage[stage].image_index >= 0);
+ +
+ +   uint32_t image_index =
+ +      state->layout->set[set].stage[stage].image_start +
+ +      set_layout->binding[binding].stage[stage].image_index;
+ +
+ +   assert(image_index < state->layout->stage[stage].image_count);
+ +
+ +   return image_index;
+ +}
+ +
+ +static void
+ +lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
+ +                          struct apply_pipeline_layout_state *state)
+ +{
+ +   nir_builder *b = &state->builder;
+ +
+ +   b->cursor = nir_before_instr(&intrin->instr);
+ +
+ +   uint32_t set = intrin->const_index[0];
+ +   uint32_t binding = intrin->const_index[1];
+ +
+ +   uint32_t surface_index = get_surface_index(set, binding, state);
+ +
+ +   nir_const_value *const_block_idx =
+ +      nir_src_as_const_value(intrin->src[0]);
+ +
+ +   nir_ssa_def *block_index;
+ +   if (const_block_idx) {
+ +      block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]);
+ +   } else {
+ +      block_index = nir_iadd(b, nir_imm_int(b, surface_index),
+ +                             nir_ssa_for_src(b, intrin->src[0], 1));
+ +   }
+ +
+ +   assert(intrin->dest.is_ssa);
+ +   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index));
+ +   nir_instr_remove(&intrin->instr);
+ +}
+ +
+ +static void
+ +lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
+ +                unsigned *const_index, nir_tex_src_type src_type,
+ +                struct apply_pipeline_layout_state *state)
+ +{
+ +   if (deref->deref.child) {
+ +      assert(deref->deref.child->deref_type == nir_deref_type_array);
+ +      nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
+ +
+ +      *const_index += deref_array->base_offset;
+ +
+ +      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ +         nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
+ +                                               tex->num_srcs + 1);
+ +
+ +         for (unsigned i = 0; i < tex->num_srcs; i++) {
+ +            new_srcs[i].src_type = tex->src[i].src_type;
+ +            nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src);
+ +         }
+ +
+ +         ralloc_free(tex->src);
+ +         tex->src = new_srcs;
+ +
+ +         /* Now we can go ahead and move the source over to being a
+ +          * first-class texture source.
+ +          */
+ +         tex->src[tex->num_srcs].src_type = src_type;
+ +         tex->num_srcs++;
+ +         assert(deref_array->indirect.is_ssa);
+ +         nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src,
+ +                               deref_array->indirect);
+ +      }
+ +   }
+ +}
+ +
+ +static void
+ +cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref)
+ +{
+ +   if (deref->deref.child == NULL)
+ +      return;
+ +
+ +   nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
+ +
+ +   if (deref_array->deref_array_type != nir_deref_array_type_indirect)
+ +      return;
+ +
+ +   nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT);
+ +}
+ +
+ +static void
+ +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
+ +{
+ +   /* No one should have come by and lowered it already */
+ +   assert(tex->sampler);
+ +
+ +   nir_deref_var *tex_deref = tex->texture ? tex->texture : tex->sampler;
+ +   tex->texture_index =
+ +      get_surface_index(tex_deref->var->data.descriptor_set,
+ +                        tex_deref->var->data.binding, state);
+ +   lower_tex_deref(tex, tex_deref, &tex->texture_index,
+ +                   nir_tex_src_texture_offset, state);
+ +
+ +   tex->sampler_index =
+ +      get_sampler_index(tex->sampler->var->data.descriptor_set,
+ +                        tex->sampler->var->data.binding, tex->op, state);
+ +   lower_tex_deref(tex, tex->sampler, &tex->sampler_index,
+ +                   nir_tex_src_sampler_offset, state);
+ +
+ +   /* The backend only ever uses this to mark used surfaces.  We don't care
+ +    * about that little optimization so it just needs to be non-zero.
+ +    */
+ +   tex->texture_array_size = 1;
+ +
+ +   if (tex->texture)
+ +      cleanup_tex_deref(tex, tex->texture);
+ +   cleanup_tex_deref(tex, tex->sampler);
+ +   tex->texture = NULL;
+ +   tex->sampler = NULL;
+ +}
+ +
+ +static bool
+ +apply_pipeline_layout_block(nir_block *block, void *void_state)
+ +{
+ +   struct apply_pipeline_layout_state *state = void_state;
+ +
+ +   nir_foreach_instr_safe(block, instr) {
+ +      switch (instr->type) {
+ +      case nir_instr_type_intrinsic: {
+ +         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ +         if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
+ +            lower_res_index_intrinsic(intrin, state);
+ +            state->progress = true;
+ +         }
+ +         break;
+ +      }
+ +      case nir_instr_type_tex:
+ +         lower_tex(nir_instr_as_tex(instr), state);
+ +         /* All texture instructions need lowering */
+ +         state->progress = true;
+ +         break;
+ +      default:
+ +         continue;
+ +      }
+ +   }
+ +
+ +   return true;
+ +}
+ +
+ +static void
+ +setup_vec4_uniform_value(const union gl_constant_value **params,
+ +                         const union gl_constant_value *values,
+ +                         unsigned n)
+ +{
+ +   static const gl_constant_value zero = { 0 };
+ +
+ +   for (unsigned i = 0; i < n; ++i)
+ +      params[i] = &values[i];
+ +
+ +   for (unsigned i = n; i < 4; ++i)
+ +      params[i] = &zero;
+ +}
+ +
+ +bool
+ +anv_nir_apply_pipeline_layout(nir_shader *shader,
+ +                              struct brw_stage_prog_data *prog_data,
+ +                              const struct anv_pipeline_layout *layout)
+ +{
+ +   struct apply_pipeline_layout_state state = {
+ +      .shader = shader,
+ +      .layout = layout,
+ +   };
+ +
+ +   nir_foreach_function(shader, function) {
+ +      if (function->impl) {
+ +         nir_builder_init(&state.builder, function->impl);
+ +         nir_foreach_block(function->impl, apply_pipeline_layout_block, &state);
+ +         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ +                                               nir_metadata_dominance);
+ +      }
+ +   }
+ +
+ +   if (layout->stage[shader->stage].image_count > 0) {
+ +      nir_foreach_variable(var, &shader->uniforms) {
+ +         if (glsl_type_is_image(var->type) ||
+ +             (glsl_type_is_array(var->type) &&
+ +              glsl_type_is_image(glsl_get_array_element(var->type)))) {
+ +            /* Images are represented as uniform push constants and the actual
+ +             * information required for reading/writing to/from the image is
+ +             * storred in the uniform.
+ +             */
+ +            unsigned image_index = get_image_index(var->data.descriptor_set,
+ +                                                   var->data.binding, &state);
+ +
+ +            var->data.driver_location = shader->num_uniforms +
+ +                                        image_index * BRW_IMAGE_PARAM_SIZE * 4;
+ +         }
+ +      }
+ +
+ +      struct anv_push_constants *null_data = NULL;
+ +      const gl_constant_value **param = prog_data->param + shader->num_uniforms;
+ +      const struct brw_image_param *image_param = null_data->images;
+ +      for (uint32_t i = 0; i < layout->stage[shader->stage].image_count; i++) {
+ +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
+ +            (const union gl_constant_value *)&image_param->surface_idx, 1);
+ +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
+ +            (const union gl_constant_value *)image_param->offset, 2);
+ +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
+ +            (const union gl_constant_value *)image_param->size, 3);
+ +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
+ +            (const union gl_constant_value *)image_param->stride, 4);
+ +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
+ +            (const union gl_constant_value *)image_param->tiling, 3);
+ +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
+ +            (const union gl_constant_value *)image_param->swizzling, 2);
+ +
+ +         param += BRW_IMAGE_PARAM_SIZE;
+ +         image_param ++;
+ +      }
+ +
+ +      shader->num_uniforms += layout->stage[shader->stage].image_count *
+ +                              BRW_IMAGE_PARAM_SIZE * 4;
+ +   }
+ +
+ +   return state.progress;
+ +}
diff --cc src/vulkan/anv_pipeline.c

index 106b9221dd76c574171195747429f4d2039dd510,0000000000000000000000000000000000000000..3c5072ba924e22fd8fc720687631d8b6d182500c

mode 100644,000000..100644
--- 1/src/vulkan/anv_pipeline.c
--- /dev/null
+++ b/src/vulkan/anv_pipeline.c
@@@ -1,1300 -1,0 +1,1300 @@@
- #include "glsl/nir/spirv/nir_spirv.h"
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include <assert.h>
+ +#include <stdbool.h>
+ +#include <string.h>
+ +#include <unistd.h>
+ +#include <fcntl.h>
+ +
+ +#include "anv_private.h"
+ +#include "brw_nir.h"
+ +#include "anv_nir.h"
++#include "nir/spirv/nir_spirv.h"
+ +
+ +/* Needed for SWIZZLE macros */
+ +#include "program/prog_instruction.h"
+ +
+ +// Shader functions
+ +
+ +VkResult anv_CreateShaderModule(
+ +    VkDevice                                    _device,
+ +    const VkShaderModuleCreateInfo*             pCreateInfo,
+ +    const VkAllocationCallbacks*                pAllocator,
+ +    VkShaderModule*                             pShaderModule)
+ +{
+ +   ANV_FROM_HANDLE(anv_device, device, _device);
+ +   struct anv_shader_module *module;
+ +
+ +   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
+ +   assert(pCreateInfo->flags == 0);
+ +
+ +   module = anv_alloc2(&device->alloc, pAllocator,
+ +                       sizeof(*module) + pCreateInfo->codeSize, 8,
+ +                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ +   if (module == NULL)
+ +      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ +
+ +   module->nir = NULL;
+ +   module->size = pCreateInfo->codeSize;
+ +   memcpy(module->data, pCreateInfo->pCode, module->size);
+ +
+ +   *pShaderModule = anv_shader_module_to_handle(module);
+ +
+ +   return VK_SUCCESS;
+ +}
+ +
+ +void anv_DestroyShaderModule(
+ +    VkDevice                                    _device,
+ +    VkShaderModule                              _module,
+ +    const VkAllocationCallbacks*                pAllocator)
+ +{
+ +   ANV_FROM_HANDLE(anv_device, device, _device);
+ +   ANV_FROM_HANDLE(anv_shader_module, module, _module);
+ +
+ +   anv_free2(&device->alloc, pAllocator, module);
+ +}
+ +
+ +#define SPIR_V_MAGIC_NUMBER 0x07230203
+ +
+ +/* Eventually, this will become part of anv_CreateShader.  Unfortunately,
+ + * we can't do that yet because we don't have the ability to copy nir.
+ + */
+ +static nir_shader *
+ +anv_shader_compile_to_nir(struct anv_device *device,
+ +                          struct anv_shader_module *module,
+ +                          const char *entrypoint_name,
+ +                          gl_shader_stage stage,
+ +                          const VkSpecializationInfo *spec_info)
+ +{
+ +   if (strcmp(entrypoint_name, "main") != 0) {
+ +      anv_finishme("Multiple shaders per module not really supported");
+ +   }
+ +
+ +   const struct brw_compiler *compiler =
+ +      device->instance->physicalDevice.compiler;
+ +   const nir_shader_compiler_options *nir_options =
+ +      compiler->glsl_compiler_options[stage].NirOptions;
+ +
+ +   nir_shader *nir;
+ +   nir_function *entry_point;
+ +   if (module->nir) {
+ +      /* Some things such as our meta clear/blit code will give us a NIR
+ +       * shader directly.  In that case, we just ignore the SPIR-V entirely
+ +       * and just use the NIR shader */
+ +      nir = module->nir;
+ +      nir->options = nir_options;
+ +      nir_validate_shader(nir);
+ +
+ +      assert(exec_list_length(&nir->functions) == 1);
+ +      struct exec_node *node = exec_list_get_head(&nir->functions);
+ +      entry_point = exec_node_data(nir_function, node, node);
+ +   } else {
+ +      uint32_t *spirv = (uint32_t *) module->data;
+ +      assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
+ +      assert(module->size % 4 == 0);
+ +
+ +      uint32_t num_spec_entries = 0;
+ +      struct nir_spirv_specialization *spec_entries = NULL;
+ +      if (spec_info && spec_info->mapEntryCount > 0) {
+ +         num_spec_entries = spec_info->mapEntryCount;
+ +         spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
+ +         for (uint32_t i = 0; i < num_spec_entries; i++) {
+ +            const uint32_t *data =
+ +               spec_info->pData + spec_info->pMapEntries[i].offset;
+ +            assert((const void *)(data + 1) <=
+ +                   spec_info->pData + spec_info->dataSize);
+ +
+ +            spec_entries[i].id = spec_info->pMapEntries[i].constantID;
+ +            spec_entries[i].data = *data;
+ +         }
+ +      }
+ +
+ +      entry_point = spirv_to_nir(spirv, module->size / 4,
+ +                                 spec_entries, num_spec_entries,
+ +                                 stage, entrypoint_name, nir_options);
+ +      nir = entry_point->shader;
+ +      assert(nir->stage == stage);
+ +      nir_validate_shader(nir);
+ +
+ +      free(spec_entries);
+ +
+ +      nir_lower_returns(nir);
+ +      nir_validate_shader(nir);
+ +
+ +      nir_inline_functions(nir);
+ +      nir_validate_shader(nir);
+ +
+ +      /* Pick off the single entrypoint that we want */
+ +      foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
+ +         if (func != entry_point)
+ +            exec_node_remove(&func->node);
+ +      }
+ +      assert(exec_list_length(&nir->functions) == 1);
+ +      entry_point->name = ralloc_strdup(entry_point, "main");
+ +
+ +      nir_remove_dead_variables(nir, nir_var_shader_in);
+ +      nir_remove_dead_variables(nir, nir_var_shader_out);
+ +      nir_remove_dead_variables(nir, nir_var_system_value);
+ +      nir_validate_shader(nir);
+ +
+ +      nir_lower_outputs_to_temporaries(entry_point->shader, entry_point);
+ +
+ +      nir_lower_system_values(nir);
+ +      nir_validate_shader(nir);
+ +   }
+ +
+ +   /* Vulkan uses the separate-shader linking model */
+ +   nir->info.separate_shader = true;
+ +
+ +   nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]);
+ +
+ +   nir_shader_gather_info(nir, entry_point->impl);
+ +
+ +   uint32_t indirect_mask = 0;
+ +   if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput)
+ +      indirect_mask |= (1 << nir_var_shader_in);
+ +   if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp)
+ +      indirect_mask |= 1 << nir_var_local;
+ +
+ +   nir_lower_indirect_derefs(nir, indirect_mask);
+ +
+ +   return nir;
+ +}
+ +
+ +void
+ +anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
+ +                        struct anv_device *device)
+ +{
+ +   cache->device = device;
+ +   anv_state_stream_init(&cache->program_stream,
+ +                         &device->instruction_block_pool);
+ +   pthread_mutex_init(&cache->mutex, NULL);
+ +}
+ +
+ +void
+ +anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
+ +{
+ +   anv_state_stream_finish(&cache->program_stream);
+ +   pthread_mutex_destroy(&cache->mutex);
+ +}
+ +
+ +static uint32_t
+ +anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
+ +                                 const void *data, size_t size)
+ +{
+ +   pthread_mutex_lock(&cache->mutex);
+ +
+ +   struct anv_state state =
+ +      anv_state_stream_alloc(&cache->program_stream, size, 64);
+ +
+ +   pthread_mutex_unlock(&cache->mutex);
+ +
+ +   assert(size < cache->program_stream.block_pool->block_size);
+ +
+ +   memcpy(state.map, data, size);
+ +
+ +   if (!cache->device->info.has_llc)
+ +      anv_state_clflush(state);
+ +
+ +   return state.offset;
+ +}
+ +
+ +VkResult anv_CreatePipelineCache(
+ +    VkDevice                                    _device,
+ +    const VkPipelineCacheCreateInfo*            pCreateInfo,
+ +    const VkAllocationCallbacks*                pAllocator,
+ +    VkPipelineCache*                            pPipelineCache)
+ +{
+ +   ANV_FROM_HANDLE(anv_device, device, _device);
+ +   struct anv_pipeline_cache *cache;
+ +
+ +   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
+ +   assert(pCreateInfo->flags == 0);
+ +
+ +   cache = anv_alloc2(&device->alloc, pAllocator,
+ +                       sizeof(*cache), 8,
+ +                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ +   if (cache == NULL)
+ +      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ +
+ +   anv_pipeline_cache_init(cache, device);
+ +
+ +   *pPipelineCache = anv_pipeline_cache_to_handle(cache);
+ +
+ +   return VK_SUCCESS;
+ +}
+ +
+ +void anv_DestroyPipelineCache(
+ +    VkDevice                                    _device,
+ +    VkPipelineCache                             _cache,
+ +    const VkAllocationCallbacks*                pAllocator)
+ +{
+ +   ANV_FROM_HANDLE(anv_device, device, _device);
+ +   ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
+ +
+ +   anv_pipeline_cache_finish(cache);
+ +
+ +   anv_free2(&device->alloc, pAllocator, cache);
+ +}
+ +
+ +VkResult anv_GetPipelineCacheData(
+ +    VkDevice                                    device,
+ +    VkPipelineCache                             pipelineCache,
+ +    size_t*                                     pDataSize,
+ +    void*                                       pData)
+ +{
+ +   *pDataSize = 0;
+ +
+ +   return VK_SUCCESS;
+ +}
+ +
+ +VkResult anv_MergePipelineCaches(
+ +    VkDevice                                    device,
+ +    VkPipelineCache                             destCache,
+ +    uint32_t                                    srcCacheCount,
+ +    const VkPipelineCache*                      pSrcCaches)
+ +{
+ +   stub_return(VK_SUCCESS);
+ +}
+ +
+ +void anv_DestroyPipeline(
+ +    VkDevice                                    _device,
+ +    VkPipeline                                  _pipeline,
+ +    const VkAllocationCallbacks*                pAllocator)
+ +{
+ +   ANV_FROM_HANDLE(anv_device, device, _device);
+ +   ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
+ +
+ +   anv_reloc_list_finish(&pipeline->batch_relocs,
+ +                         pAllocator ? pAllocator : &device->alloc);
+ +   if (pipeline->blend_state.map)
+ +      anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
+ +   anv_free2(&device->alloc, pAllocator, pipeline);
+ +}
+ +
+ +static const uint32_t vk_to_gen_primitive_type[] = {
+ +   [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
+ +   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
+ +   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
+ +   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
+ +   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
+ +   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
+ +   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
+ +   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
+ +   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
+ +   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
+ +/*   [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST]                = _3DPRIM_PATCHLIST_1 */
+ +};
+ +
+ +static void
+ +populate_sampler_prog_key(const struct brw_device_info *devinfo,
+ +                          struct brw_sampler_prog_key_data *key)
+ +{
+ +   /* XXX: Handle texture swizzle on HSW- */
+ +   for (int i = 0; i < MAX_SAMPLERS; i++) {
+ +      /* Assume color sampler, no swizzling. (Works for BDW+) */
+ +      key->swizzles[i] = SWIZZLE_XYZW;
+ +   }
+ +}
+ +
+ +static void
+ +populate_vs_prog_key(const struct brw_device_info *devinfo,
+ +                     struct brw_vs_prog_key *key)
+ +{
+ +   memset(key, 0, sizeof(*key));
+ +
+ +   populate_sampler_prog_key(devinfo, &key->tex);
+ +
+ +   /* XXX: Handle vertex input work-arounds */
+ +
+ +   /* XXX: Handle sampler_prog_key */
+ +}
+ +
+ +static void
+ +populate_gs_prog_key(const struct brw_device_info *devinfo,
+ +                     struct brw_gs_prog_key *key)
+ +{
+ +   memset(key, 0, sizeof(*key));
+ +
+ +   populate_sampler_prog_key(devinfo, &key->tex);
+ +}
+ +
+ +static void
+ +populate_wm_prog_key(const struct brw_device_info *devinfo,
+ +                     const VkGraphicsPipelineCreateInfo *info,
+ +                     const struct anv_graphics_pipeline_create_info *extra,
+ +                     struct brw_wm_prog_key *key)
+ +{
+ +   ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass);
+ +
+ +   memset(key, 0, sizeof(*key));
+ +
+ +   populate_sampler_prog_key(devinfo, &key->tex);
+ +
+ +   /* TODO: Fill out key->input_slots_valid */
+ +
+ +   /* Vulkan doesn't specify a default */
+ +   key->high_quality_derivatives = false;
+ +
+ +   /* XXX Vulkan doesn't appear to specify */
+ +   key->clamp_fragment_color = false;
+ +
+ +   /* Vulkan always specifies upper-left coordinates */
+ +   key->drawable_height = 0;
+ +   key->render_to_fbo = false;
+ +
+ +   if (extra && extra->color_attachment_count >= 0) {
+ +      key->nr_color_regions = extra->color_attachment_count;
+ +   } else {
+ +      key->nr_color_regions =
+ +         render_pass->subpasses[info->subpass].color_count;
+ +   }
+ +
+ +   key->replicate_alpha = key->nr_color_regions > 1 &&
+ +                          info->pMultisampleState &&
+ +                          info->pMultisampleState->alphaToCoverageEnable;
+ +
+ +   if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) {
+ +      /* We should probably pull this out of the shader, but it's fairly
+ +       * harmless to compute it and then let dead-code take care of it.
+ +       */
+ +      key->persample_shading = info->pMultisampleState->sampleShadingEnable;
+ +      if (key->persample_shading)
+ +         key->persample_2x = info->pMultisampleState->rasterizationSamples == 2;
+ +
+ +      key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable;
+ +      key->compute_sample_id = info->pMultisampleState->sampleShadingEnable;
+ +   }
+ +}
+ +
+ +static void
+ +populate_cs_prog_key(const struct brw_device_info *devinfo,
+ +                     struct brw_cs_prog_key *key)
+ +{
+ +   memset(key, 0, sizeof(*key));
+ +
+ +   populate_sampler_prog_key(devinfo, &key->tex);
+ +}
+ +
+ +static nir_shader *
+ +anv_pipeline_compile(struct anv_pipeline *pipeline,
+ +                     struct anv_shader_module *module,
+ +                     const char *entrypoint,
+ +                     gl_shader_stage stage,
+ +                     const VkSpecializationInfo *spec_info,
+ +                     struct brw_stage_prog_data *prog_data)
+ +{
+ +   const struct brw_compiler *compiler =
+ +      pipeline->device->instance->physicalDevice.compiler;
+ +
+ +   nir_shader *nir = anv_shader_compile_to_nir(pipeline->device,
+ +                                               module, entrypoint, stage,
+ +                                               spec_info);
+ +   if (nir == NULL)
+ +      return NULL;
+ +
+ +   anv_nir_lower_push_constants(nir, compiler->scalar_stage[stage]);
+ +
+ +   /* Figure out the number of parameters */
+ +   prog_data->nr_params = 0;
+ +
+ +   if (nir->num_uniforms > 0) {
+ +      /* If the shader uses any push constants at all, we'll just give
+ +       * them the maximum possible number
+ +       */
+ +      prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
+ +   }
+ +
+ +   if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets)
+ +      prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2;
+ +
+ +   if (pipeline->layout && pipeline->layout->stage[stage].image_count > 0)
+ +      prog_data->nr_params += pipeline->layout->stage[stage].image_count *
+ +                              BRW_IMAGE_PARAM_SIZE;
+ +
+ +   if (prog_data->nr_params > 0) {
+ +      /* XXX: I think we're leaking this */
+ +      prog_data->param = (const union gl_constant_value **)
+ +         malloc(prog_data->nr_params * sizeof(union gl_constant_value *));
+ +
+ +      /* We now set the param values to be offsets into a
+ +       * anv_push_constant_data structure.  Since the compiler doesn't
+ +       * actually dereference any of the gl_constant_value pointers in the
+ +       * params array, it doesn't really matter what we put here.
+ +       */
+ +      struct anv_push_constants *null_data = NULL;
+ +      if (nir->num_uniforms > 0) {
+ +         /* Fill out the push constants section of the param array */
+ +         for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++)
+ +            prog_data->param[i] = (const union gl_constant_value *)
+ +               &null_data->client_data[i * sizeof(float)];
+ +      }
+ +   }
+ +
+ +   /* Set up dynamic offsets */
+ +   anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data);
+ +
+ +   /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
+ +   if (pipeline->layout)
+ +      anv_nir_apply_pipeline_layout(nir, prog_data, pipeline->layout);
+ +
+ +   /* All binding table offsets provided by apply_pipeline_layout() are
+ +    * relative to the start of the bindint table (plus MAX_RTS for VS).
+ +    */
+ +   unsigned bias;
+ +   switch (stage) {
+ +   case MESA_SHADER_FRAGMENT:
+ +      bias = MAX_RTS;
+ +      break;
+ +   case MESA_SHADER_COMPUTE:
+ +      bias = 1;
+ +      break;
+ +   default:
+ +      bias = 0;
+ +      break;
+ +   }
+ +   prog_data->binding_table.size_bytes = 0;
+ +   prog_data->binding_table.texture_start = bias;
+ +   prog_data->binding_table.ubo_start = bias;
+ +   prog_data->binding_table.ssbo_start = bias;
+ +   prog_data->binding_table.image_start = bias;
+ +
+ +   /* Finish the optimization and compilation process */
+ +   nir = brw_nir_lower_io(nir, &pipeline->device->info,
+ +                          compiler->scalar_stage[stage]);
+ +
+ +   /* nir_lower_io will only handle the push constants; we need to set this
+ +    * to the full number of possible uniforms.
+ +    */
+ +   nir->num_uniforms = prog_data->nr_params * 4;
+ +
+ +   return nir;
+ +}
+ +
+ +static void
+ +anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
+ +                                gl_shader_stage stage,
+ +                                struct brw_stage_prog_data *prog_data)
+ +{
+ +   struct brw_device_info *devinfo = &pipeline->device->info;
+ +   uint32_t max_threads[] = {
+ +      [MESA_SHADER_VERTEX]                  = devinfo->max_vs_threads,
+ +      [MESA_SHADER_TESS_CTRL]               = 0,
+ +      [MESA_SHADER_TESS_EVAL]               = 0,
+ +      [MESA_SHADER_GEOMETRY]                = devinfo->max_gs_threads,
+ +      [MESA_SHADER_FRAGMENT]                = devinfo->max_wm_threads,
+ +      [MESA_SHADER_COMPUTE]                 = devinfo->max_cs_threads,
+ +   };
+ +
+ +   pipeline->prog_data[stage] = prog_data;
+ +   pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
+ +   pipeline->scratch_start[stage] = pipeline->total_scratch;
+ +   pipeline->total_scratch =
+ +      align_u32(pipeline->total_scratch, 1024) +
+ +      prog_data->total_scratch * max_threads[stage];
+ +}
+ +
+ +static VkResult
+ +anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
+ +                        struct anv_pipeline_cache *cache,
+ +                        const VkGraphicsPipelineCreateInfo *info,
+ +                        struct anv_shader_module *module,
+ +                        const char *entrypoint,
+ +                        const VkSpecializationInfo *spec_info)
+ +{
+ +   const struct brw_compiler *compiler =
+ +      pipeline->device->instance->physicalDevice.compiler;
+ +   struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
+ +   struct brw_vs_prog_key key;
+ +
+ +   populate_vs_prog_key(&pipeline->device->info, &key);
+ +
+ +   /* TODO: Look up shader in cache */
+ +
+ +   memset(prog_data, 0, sizeof(*prog_data));
+ +
+ +   nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
+ +                                          MESA_SHADER_VERTEX, spec_info,
+ +                                          &prog_data->base.base);
+ +   if (nir == NULL)
+ +      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ +
+ +   void *mem_ctx = ralloc_context(NULL);
+ +
+ +   if (module->nir == NULL)
+ +      ralloc_steal(mem_ctx, nir);
+ +
+ +   prog_data->inputs_read = nir->info.inputs_read;
+ +   if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ))
+ +      pipeline->writes_point_size = true;
+ +
+ +   brw_compute_vue_map(&pipeline->device->info,
+ +                       &prog_data->base.vue_map,
+ +                       nir->info.outputs_written,
+ +                       nir->info.separate_shader);
+ +
+ +   unsigned code_size;
+ +   const unsigned *shader_code =
+ +      brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+ +                     NULL, false, -1, &code_size, NULL);
+ +   if (shader_code == NULL) {
+ +      ralloc_free(mem_ctx);
+ +      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ +   }
+ +
+ +   const uint32_t offset =
+ +      anv_pipeline_cache_upload_kernel(cache, shader_code, code_size);
+ +   if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
+ +      pipeline->vs_simd8 = offset;
+ +      pipeline->vs_vec4 = NO_KERNEL;
+ +   } else {
+ +      pipeline->vs_simd8 = NO_KERNEL;
+ +      pipeline->vs_vec4 = offset;
+ +   }
+ +
+ +   ralloc_free(mem_ctx);
+ +
+ +   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX,
+ +                                   &prog_data->base.base);
+ +
+ +   return VK_SUCCESS;
+ +}
+ +
+ +static VkResult
+ +anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
+ +                        struct anv_pipeline_cache *cache,
+ +                        const VkGraphicsPipelineCreateInfo *info,
+ +                        struct anv_shader_module *module,
+ +                        const char *entrypoint,
+ +                        const VkSpecializationInfo *spec_info)
+ +{
+ +   const struct brw_compiler *compiler =
+ +      pipeline->device->instance->physicalDevice.compiler;
+ +   struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data;
+ +   struct brw_gs_prog_key key;
+ +
+ +   populate_gs_prog_key(&pipeline->device->info, &key);
+ +
+ +   /* TODO: Look up shader in cache */
+ +
+ +   memset(prog_data, 0, sizeof(*prog_data));
+ +
+ +   nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
+ +                                          MESA_SHADER_GEOMETRY, spec_info,
+ +                                          &prog_data->base.base);
+ +   if (nir == NULL)
+ +      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ +
+ +   void *mem_ctx = ralloc_context(NULL);
+ +
+ +   if (module->nir == NULL)
+ +      ralloc_steal(mem_ctx, nir);
+ +
+ +   if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ))
+ +      pipeline->writes_point_size = true;
+ +
+ +   brw_compute_vue_map(&pipeline->device->info,
+ +                       &prog_data->base.vue_map,
+ +                       nir->info.outputs_written,
+ +                       nir->info.separate_shader);
+ +
+ +   unsigned code_size;
+ +   const unsigned *shader_code =
+ +      brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+ +                     NULL, -1, &code_size, NULL);
+ +   if (shader_code == NULL) {
+ +      ralloc_free(mem_ctx);
+ +      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ +   }
+ +
+ +   /* TODO: SIMD8 GS */
+ +   pipeline->gs_kernel =
+ +      anv_pipeline_cache_upload_kernel(cache, shader_code, code_size);
+ +   pipeline->gs_vertex_count = nir->info.gs.vertices_in;
+ +
+ +   ralloc_free(mem_ctx);
+ +
+ +   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY,
+ +                                   &prog_data->base.base);
+ +
+ +   return VK_SUCCESS;
+ +}
+ +
+ +static VkResult
+ +anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
+ +                        struct anv_pipeline_cache *cache,
+ +                        const VkGraphicsPipelineCreateInfo *info,
+ +                        const struct anv_graphics_pipeline_create_info *extra,
+ +                        struct anv_shader_module *module,
+ +                        const char *entrypoint,
+ +                        const VkSpecializationInfo *spec_info)
+ +{
+ +   const struct brw_compiler *compiler =
+ +      pipeline->device->instance->physicalDevice.compiler;
+ +   struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
+ +   struct brw_wm_prog_key key;
+ +
+ +   populate_wm_prog_key(&pipeline->device->info, info, extra, &key);
+ +
+ +   if (pipeline->use_repclear)
+ +      key.nr_color_regions = 1;
+ +
+ +   /* TODO: Look up shader in cache */
+ +
+ +   memset(prog_data, 0, sizeof(*prog_data));
+ +
+ +   prog_data->binding_table.render_target_start = 0;
+ +
+ +   nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
+ +                                          MESA_SHADER_FRAGMENT, spec_info,
+ +                                          &prog_data->base);
+ +   if (nir == NULL)
+ +      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ +
+ +   void *mem_ctx = ralloc_context(NULL);
+ +
+ +   if (module->nir == NULL)
+ +      ralloc_steal(mem_ctx, nir);
+ +
+ +   unsigned code_size;
+ +   const unsigned *shader_code =
+ +      brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+ +                     NULL, -1, -1, pipeline->use_repclear, &code_size, NULL);
+ +   if (shader_code == NULL) {
+ +      ralloc_free(mem_ctx);
+ +      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ +   }
+ +
+ +   uint32_t offset =
+ +      anv_pipeline_cache_upload_kernel(cache, shader_code, code_size);
+ +   if (prog_data->no_8)
+ +      pipeline->ps_simd8 = NO_KERNEL;
+ +   else
+ +      pipeline->ps_simd8 = offset;
+ +
+ +   if (prog_data->no_8 || prog_data->prog_offset_16) {
+ +      pipeline->ps_simd16 = offset + prog_data->prog_offset_16;
+ +   } else {
+ +      pipeline->ps_simd16 = NO_KERNEL;
+ +   }
+ +
+ +   pipeline->ps_ksp2 = 0;
+ +   pipeline->ps_grf_start2 = 0;
+ +   if (pipeline->ps_simd8 != NO_KERNEL) {
+ +      pipeline->ps_ksp0 = pipeline->ps_simd8;
+ +      pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg;
+ +      if (pipeline->ps_simd16 != NO_KERNEL) {
+ +         pipeline->ps_ksp2 = pipeline->ps_simd16;
+ +         pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16;
+ +      }
+ +   } else if (pipeline->ps_simd16 != NO_KERNEL) {
+ +      pipeline->ps_ksp0 = pipeline->ps_simd16;
+ +      pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16;
+ +   }
+ +
+ +   ralloc_free(mem_ctx);
+ +
+ +   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
+ +                                   &prog_data->base);
+ +
+ +   return VK_SUCCESS;
+ +}
+ +
+ +VkResult
+ +anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
+ +                        struct anv_pipeline_cache *cache,
+ +                        const VkComputePipelineCreateInfo *info,
+ +                        struct anv_shader_module *module,
+ +                        const char *entrypoint,
+ +                        const VkSpecializationInfo *spec_info)
+ +{
+ +   const struct brw_compiler *compiler =
+ +      pipeline->device->instance->physicalDevice.compiler;
+ +   struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+ +   struct brw_cs_prog_key key;
+ +
+ +   populate_cs_prog_key(&pipeline->device->info, &key);
+ +
+ +   /* TODO: Look up shader in cache */
+ +
+ +   memset(prog_data, 0, sizeof(*prog_data));
+ +
+ +   prog_data->binding_table.work_groups_start = 0;
+ +
+ +   nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
+ +                                          MESA_SHADER_COMPUTE, spec_info,
+ +                                          &prog_data->base);
+ +   if (nir == NULL)
+ +      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ +
+ +   prog_data->base.total_shared = nir->num_shared;
+ +
+ +   void *mem_ctx = ralloc_context(NULL);
+ +
+ +   if (module->nir == NULL)
+ +      ralloc_steal(mem_ctx, nir);
+ +
+ +   unsigned code_size;
+ +   const unsigned *shader_code =
+ +      brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+ +                     -1, &code_size, NULL);
+ +   if (shader_code == NULL) {
+ +      ralloc_free(mem_ctx);
+ +      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ +   }
+ +
+ +   pipeline->cs_simd =
+ +      anv_pipeline_cache_upload_kernel(cache, shader_code, code_size);
+ +   ralloc_free(mem_ctx);
+ +
+ +   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE,
+ +                                   &prog_data->base);
+ +
+ +   return VK_SUCCESS;
+ +}
+ +
+ +static const int gen8_push_size = 32 * 1024;
+ +
+ +static void
+ +gen7_compute_urb_partition(struct anv_pipeline *pipeline)
+ +{
+ +   const struct brw_device_info *devinfo = &pipeline->device->info;
+ +   bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT;
+ +   unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
+ +   unsigned vs_entry_size_bytes = vs_size * 64;
+ +   bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT;
+ +   unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
+ +   unsigned gs_entry_size_bytes = gs_size * 64;
+ +
+ +   /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
+ +    *
+ +    *     VS Number of URB Entries must be divisible by 8 if the VS URB Entry
+ +    *     Allocation Size is less than 9 512-bit URB entries.
+ +    *
+ +    * Similar text exists for GS.
+ +    */
+ +   unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
+ +   unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
+ +
+ +   /* URB allocations must be done in 8k chunks. */
+ +   unsigned chunk_size_bytes = 8192;
+ +
+ +   /* Determine the size of the URB in chunks. */
+ +   unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes;
+ +
+ +   /* Reserve space for push constants */
+ +   unsigned push_constant_bytes = gen8_push_size;
+ +   unsigned push_constant_chunks =
+ +      push_constant_bytes / chunk_size_bytes;
+ +
+ +   /* Initially, assign each stage the minimum amount of URB space it needs,
+ +    * and make a note of how much additional space it "wants" (the amount of
+ +    * additional space it could actually make use of).
+ +    */
+ +
+ +   /* VS has a lower limit on the number of URB entries */
+ +   unsigned vs_chunks =
+ +      ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes,
+ +            chunk_size_bytes) / chunk_size_bytes;
+ +   unsigned vs_wants =
+ +      ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
+ +            chunk_size_bytes) / chunk_size_bytes - vs_chunks;
+ +
+ +   unsigned gs_chunks = 0;
+ +   unsigned gs_wants = 0;
+ +   if (gs_present) {
+ +      /* There are two constraints on the minimum amount of URB space we can
+ +       * allocate:
+ +       *
+ +       * (1) We need room for at least 2 URB entries, since we always operate
+ +       * the GS in DUAL_OBJECT mode.
+ +       *
+ +       * (2) We can't allocate less than nr_gs_entries_granularity.
+ +       */
+ +      gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
+ +                        chunk_size_bytes) / chunk_size_bytes;
+ +      gs_wants =
+ +         ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
+ +               chunk_size_bytes) / chunk_size_bytes - gs_chunks;
+ +   }
+ +
+ +   /* There should always be enough URB space to satisfy the minimum
+ +    * requirements of each stage.
+ +    */
+ +   unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
+ +   assert(total_needs <= urb_chunks);
+ +
+ +   /* Mete out remaining space (if any) in proportion to "wants". */
+ +   unsigned total_wants = vs_wants + gs_wants;
+ +   unsigned remaining_space = urb_chunks - total_needs;
+ +   if (remaining_space > total_wants)
+ +      remaining_space = total_wants;
+ +   if (remaining_space > 0) {
+ +      unsigned vs_additional = (unsigned)
+ +         round(vs_wants * (((double) remaining_space) / total_wants));
+ +      vs_chunks += vs_additional;
+ +      remaining_space -= vs_additional;
+ +      gs_chunks += remaining_space;
+ +   }
+ +
+ +   /* Sanity check that we haven't over-allocated. */
+ +   assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
+ +
+ +   /* Finally, compute the number of entries that can fit in the space
+ +    * allocated to each stage.
+ +    */
+ +   unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
+ +   unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
+ +
+ +   /* Since we rounded up when computing *_wants, this may be slightly more
+ +    * than the maximum allowed amount, so correct for that.
+ +    */
+ +   nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
+ +   nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
+ +
+ +   /* Ensure that we program a multiple of the granularity. */
+ +   nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
+ +   nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
+ +
+ +   /* Finally, sanity check to make sure we have at least the minimum number
+ +    * of entries needed for each stage.
+ +    */
+ +   assert(nr_vs_entries >= devinfo->urb.min_vs_entries);
+ +   if (gs_present)
+ +      assert(nr_gs_entries >= 2);
+ +
+ +   /* Lay out the URB in the following order:
+ +    * - push constants
+ +    * - VS
+ +    * - GS
+ +    */
+ +   pipeline->urb.vs_start = push_constant_chunks;
+ +   pipeline->urb.vs_size = vs_size;
+ +   pipeline->urb.nr_vs_entries = nr_vs_entries;
+ +
+ +   pipeline->urb.gs_start = push_constant_chunks + vs_chunks;
+ +   pipeline->urb.gs_size = gs_size;
+ +   pipeline->urb.nr_gs_entries = nr_gs_entries;
+ +}
+ +
+ +static void
+ +anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline,
+ +                                const VkGraphicsPipelineCreateInfo *pCreateInfo)
+ +{
+ +   anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
+ +   ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
+ +   struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
+ +
+ +   pipeline->dynamic_state = default_dynamic_state;
+ +
+ +   if (pCreateInfo->pDynamicState) {
+ +      /* Remove all of the states that are marked as dynamic */
+ +      uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
+ +      for (uint32_t s = 0; s < count; s++)
+ +         states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]);
+ +   }
+ +
+ +   struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
+ +
+ +   dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
+ +   if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
+ +      typed_memcpy(dynamic->viewport.viewports,
+ +                   pCreateInfo->pViewportState->pViewports,
+ +                   pCreateInfo->pViewportState->viewportCount);
+ +   }
+ +
+ +   dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
+ +   if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
+ +      typed_memcpy(dynamic->scissor.scissors,
+ +                   pCreateInfo->pViewportState->pScissors,
+ +                   pCreateInfo->pViewportState->scissorCount);
+ +   }
+ +
+ +   if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
+ +      assert(pCreateInfo->pRasterizationState);
+ +      dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
+ +   }
+ +
+ +   if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
+ +      assert(pCreateInfo->pRasterizationState);
+ +      dynamic->depth_bias.bias =
+ +         pCreateInfo->pRasterizationState->depthBiasConstantFactor;
+ +      dynamic->depth_bias.clamp =
+ +         pCreateInfo->pRasterizationState->depthBiasClamp;
+ +      dynamic->depth_bias.slope =
+ +         pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
+ +   }
+ +
+ +   if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
+ +      assert(pCreateInfo->pColorBlendState);
+ +      typed_memcpy(dynamic->blend_constants,
+ +                   pCreateInfo->pColorBlendState->blendConstants, 4);
+ +   }
+ +
+ +   /* If there is no depthstencil attachment, then don't read
+ +    * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
+ +    * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
+ +    * no need to override the depthstencil defaults in
+ +    * anv_pipeline::dynamic_state when there is no depthstencil attachment.
+ +    *
+ +    * From the Vulkan spec (20 Oct 2015, git-aa308cb):
+ +    *
+ +    *    pDepthStencilState [...] may only be NULL if renderPass and subpass
+ +    *    specify a subpass that has no depth/stencil attachment.
+ +    */
+ +   if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
+ +      if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
+ +         assert(pCreateInfo->pDepthStencilState);
+ +         dynamic->depth_bounds.min =
+ +            pCreateInfo->pDepthStencilState->minDepthBounds;
+ +         dynamic->depth_bounds.max =
+ +            pCreateInfo->pDepthStencilState->maxDepthBounds;
+ +      }
+ +
+ +      if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
+ +         assert(pCreateInfo->pDepthStencilState);
+ +         dynamic->stencil_compare_mask.front =
+ +            pCreateInfo->pDepthStencilState->front.compareMask;
+ +         dynamic->stencil_compare_mask.back =
+ +            pCreateInfo->pDepthStencilState->back.compareMask;
+ +      }
+ +
+ +      if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
+ +         assert(pCreateInfo->pDepthStencilState);
+ +         dynamic->stencil_write_mask.front =
+ +            pCreateInfo->pDepthStencilState->front.writeMask;
+ +         dynamic->stencil_write_mask.back =
+ +            pCreateInfo->pDepthStencilState->back.writeMask;
+ +      }
+ +
+ +      if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
+ +         assert(pCreateInfo->pDepthStencilState);
+ +         dynamic->stencil_reference.front =
+ +            pCreateInfo->pDepthStencilState->front.reference;
+ +         dynamic->stencil_reference.back =
+ +            pCreateInfo->pDepthStencilState->back.reference;
+ +      }
+ +   }
+ +
+ +   pipeline->dynamic_state_mask = states;
+ +}
+ +
+ +static void
+ +anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
+ +{
+ +   struct anv_render_pass *renderpass = NULL;
+ +   struct anv_subpass *subpass = NULL;
+ +
+ +   /* Assert that all required members of VkGraphicsPipelineCreateInfo are
+ +    * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section
+ +    * 4.2 Graphics Pipeline.
+ +    */
+ +   assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
+ +
+ +   renderpass = anv_render_pass_from_handle(info->renderPass);
+ +   assert(renderpass);
+ +
+ +   if (renderpass != &anv_meta_dummy_renderpass) {
+ +      assert(info->subpass < renderpass->subpass_count);
+ +      subpass = &renderpass->subpasses[info->subpass];
+ +   }
+ +
+ +   assert(info->stageCount >= 1);
+ +   assert(info->pVertexInputState);
+ +   assert(info->pInputAssemblyState);
+ +   assert(info->pViewportState);
+ +   assert(info->pRasterizationState);
+ +
+ +   if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
+ +      assert(info->pDepthStencilState);
+ +
+ +   if (subpass && subpass->color_count > 0)
+ +      assert(info->pColorBlendState);
+ +
+ +   for (uint32_t i = 0; i < info->stageCount; ++i) {
+ +      switch (info->pStages[i].stage) {
+ +      case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
+ +      case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
+ +         assert(info->pTessellationState);
+ +         break;
+ +      default:
+ +         break;
+ +      }
+ +   }
+ +}
+ +
+ +VkResult
+ +anv_pipeline_init(struct anv_pipeline *pipeline,
+ +                  struct anv_device *device,
+ +                  struct anv_pipeline_cache *cache,
+ +                  const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ +                  const struct anv_graphics_pipeline_create_info *extra,
+ +                  const VkAllocationCallbacks *alloc)
+ +{
+ +   VkResult result;
+ +
+ +   anv_validate {
+ +      anv_pipeline_validate_create_info(pCreateInfo);
+ +   }
+ +
+ +   if (alloc == NULL)
+ +      alloc = &device->alloc;
+ +
+ +   pipeline->device = device;
+ +   pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
+ +
+ +   result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
+ +   if (result != VK_SUCCESS)
+ +      return result;
+ +
+ +   pipeline->batch.alloc = alloc;
+ +   pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
+ +   pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
+ +   pipeline->batch.relocs = &pipeline->batch_relocs;
+ +
+ +   anv_pipeline_init_dynamic_state(pipeline, pCreateInfo);
+ +
+ +   if (pCreateInfo->pTessellationState)
+ +      anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO");
+ +
+ +   pipeline->use_repclear = extra && extra->use_repclear;
+ +   pipeline->writes_point_size = false;
+ +
+ +   /* When we free the pipeline, we detect stages based on the NULL status
+ +    * of various prog_data pointers.  Make them NULL by default.
+ +    */
+ +   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
+ +   memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
+ +
+ +   pipeline->vs_simd8 = NO_KERNEL;
+ +   pipeline->vs_vec4 = NO_KERNEL;
+ +   pipeline->gs_kernel = NO_KERNEL;
+ +   pipeline->ps_ksp0 = NO_KERNEL;
+ +
+ +   pipeline->active_stages = 0;
+ +   pipeline->total_scratch = 0;
+ +
+ +   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+ +      ANV_FROM_HANDLE(anv_shader_module, module,
+ +                      pCreateInfo->pStages[i].module);
+ +
+ +      switch (pCreateInfo->pStages[i].stage) {
+ +      case VK_SHADER_STAGE_VERTEX_BIT:
+ +         anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module,
+ +                                 pCreateInfo->pStages[i].pName,
+ +                                 pCreateInfo->pStages[i].pSpecializationInfo);
+ +         break;
+ +      case VK_SHADER_STAGE_GEOMETRY_BIT:
+ +         anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module,
+ +                                 pCreateInfo->pStages[i].pName,
+ +                                 pCreateInfo->pStages[i].pSpecializationInfo);
+ +         break;
+ +      case VK_SHADER_STAGE_FRAGMENT_BIT:
+ +         anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, module,
+ +                                 pCreateInfo->pStages[i].pName,
+ +                                 pCreateInfo->pStages[i].pSpecializationInfo);
+ +         break;
+ +      default:
+ +         anv_finishme("Unsupported shader stage");
+ +      }
+ +   }
+ +
+ +   if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
+ +      /* Vertex is only optional if disable_vs is set */
+ +      assert(extra->disable_vs);
+ +      memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
+ +   }
+ +
+ +   gen7_compute_urb_partition(pipeline);
+ +
+ +   const VkPipelineVertexInputStateCreateInfo *vi_info =
+ +      pCreateInfo->pVertexInputState;
+ +
+ +   uint64_t inputs_read;
+ +   if (extra && extra->disable_vs) {
+ +      /* If the VS is disabled, just assume the user knows what they're
+ +       * doing and apply the layout blindly.  This can only come from
+ +       * meta, so this *should* be safe.
+ +       */
+ +      inputs_read = ~0ull;
+ +   } else {
+ +      inputs_read = pipeline->vs_prog_data.inputs_read;
+ +   }
+ +
+ +   pipeline->vb_used = 0;
+ +   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
+ +      const VkVertexInputAttributeDescription *desc =
+ +         &vi_info->pVertexAttributeDescriptions[i];
+ +
+ +      if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location)))
+ +         pipeline->vb_used |= 1 << desc->binding;
+ +   }
+ +
+ +   for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
+ +      const VkVertexInputBindingDescription *desc =
+ +         &vi_info->pVertexBindingDescriptions[i];
+ +
+ +      pipeline->binding_stride[desc->binding] = desc->stride;
+ +
+ +      /* Step rate is programmed per vertex element (attribute), not
+ +       * binding. Set up a map of which bindings step per instance, for
+ +       * reference by vertex element setup. */
+ +      switch (desc->inputRate) {
+ +      default:
+ +      case VK_VERTEX_INPUT_RATE_VERTEX:
+ +         pipeline->instancing_enable[desc->binding] = false;
+ +         break;
+ +      case VK_VERTEX_INPUT_RATE_INSTANCE:
+ +         pipeline->instancing_enable[desc->binding] = true;
+ +         break;
+ +      }
+ +   }
+ +
+ +   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
+ +      pCreateInfo->pInputAssemblyState;
+ +   pipeline->primitive_restart = ia_info->primitiveRestartEnable;
+ +   pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
+ +
+ +   if (extra && extra->use_rectlist)
+ +      pipeline->topology = _3DPRIM_RECTLIST;
+ +
+ +   while (anv_block_pool_size(&device->scratch_block_pool) <
+ +          pipeline->total_scratch)
+ +      anv_block_pool_alloc(&device->scratch_block_pool);
+ +
+ +   return VK_SUCCESS;
+ +}
+ +
+ +VkResult
+ +anv_graphics_pipeline_create(
+ +   VkDevice _device,
+ +   VkPipelineCache _cache,
+ +   const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ +   const struct anv_graphics_pipeline_create_info *extra,
+ +   const VkAllocationCallbacks *pAllocator,
+ +   VkPipeline *pPipeline)
+ +{
+ +   ANV_FROM_HANDLE(anv_device, device, _device);
+ +   ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
+ +
+ +   if (cache == NULL)
+ +      cache = &device->default_pipeline_cache;
+ +
+ +   switch (device->info.gen) {
+ +   case 7:
+ +      if (device->info.is_haswell)
+ +         return gen75_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline);
+ +      else
+ +         return gen7_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline);
+ +   case 8:
+ +      return gen8_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline);
+ +   case 9:
+ +      return gen9_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline);
+ +   default:
+ +      unreachable("unsupported gen\n");
+ +   }
+ +}
+ +
+ +VkResult anv_CreateGraphicsPipelines(
+ +    VkDevice                                    _device,
+ +    VkPipelineCache                             pipelineCache,
+ +    uint32_t                                    count,
+ +    const VkGraphicsPipelineCreateInfo*         pCreateInfos,
+ +    const VkAllocationCallbacks*                pAllocator,
+ +    VkPipeline*                                 pPipelines)
+ +{
+ +   VkResult result = VK_SUCCESS;
+ +
+ +   unsigned i = 0;
+ +   for (; i < count; i++) {
+ +      result = anv_graphics_pipeline_create(_device,
+ +                                            pipelineCache,
+ +                                            &pCreateInfos[i],
+ +                                            NULL, pAllocator, &pPipelines[i]);
+ +      if (result != VK_SUCCESS) {
+ +         for (unsigned j = 0; j < i; j++) {
+ +            anv_DestroyPipeline(_device, pPipelines[j], pAllocator);
+ +         }
+ +
+ +         return result;
+ +      }
+ +   }
+ +
+ +   return VK_SUCCESS;
+ +}
+ +
+ +static VkResult anv_compute_pipeline_create(
+ +    VkDevice                                    _device,
+ +    VkPipelineCache                             _cache,
+ +    const VkComputePipelineCreateInfo*          pCreateInfo,
+ +    const VkAllocationCallbacks*                pAllocator,
+ +    VkPipeline*                                 pPipeline)
+ +{
+ +   ANV_FROM_HANDLE(anv_device, device, _device);
+ +   ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
+ +
+ +   if (cache == NULL)
+ +      cache = &device->default_pipeline_cache;
+ +
+ +   switch (device->info.gen) {
+ +   case 7:
+ +      if (device->info.is_haswell)
+ +         return gen75_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline);
+ +      else
+ +         return gen7_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline);
+ +   case 8:
+ +      return gen8_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline);
+ +   case 9:
+ +      return gen9_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline);
+ +   default:
+ +      unreachable("unsupported gen\n");
+ +   }
+ +}
+ +
+ +VkResult anv_CreateComputePipelines(
+ +    VkDevice                                    _device,
+ +    VkPipelineCache                             pipelineCache,
+ +    uint32_t                                    count,
+ +    const VkComputePipelineCreateInfo*          pCreateInfos,
+ +    const VkAllocationCallbacks*                pAllocator,
+ +    VkPipeline*                                 pPipelines)
+ +{
+ +   VkResult result = VK_SUCCESS;
+ +
+ +   unsigned i = 0;
+ +   for (; i < count; i++) {
+ +      result = anv_compute_pipeline_create(_device, pipelineCache,
+ +                                           &pCreateInfos[i],
+ +                                           pAllocator, &pPipelines[i]);
+ +      if (result != VK_SUCCESS) {
+ +         for (unsigned j = 0; j < i; j++) {
+ +            anv_DestroyPipeline(_device, pPipelines[j], pAllocator);
+ +         }
+ +
+ +         return result;
+ +      }
+ +   }
+ +
+ +   return VK_SUCCESS;
+ +}
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Fri, 5 Feb 2016 23:03:04 +0000 (15:03 -0800)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Fri, 5 Feb 2016 23:03:44 +0000 (15:03 -0800)
		1	2
configure.ac	patch \|	diff1 \|	diff2 \|	blob \| history
src/Makefile.am	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/Makefile.am	patch \|	\|	diff2 \|	blob \| history
src/compiler/Makefile.sources	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/.gitignore	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/Makefile.am	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/Makefile.sources	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/ast_to_hir.cpp	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/glsl_parser_extras.cpp	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/ir.cpp	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/ir.h	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/ir_clone.cpp	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/ir_optimization.h	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/ir_validate.cpp	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/link_uniform_initializers.cpp	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/lower_packing_builtins.cpp	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl/standalone_scaffolding.cpp	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl_types.cpp	patch \|	\|	diff2 \|	blob \| history
src/compiler/glsl_types.h	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/Makefile.sources	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/glsl_to_nir.cpp	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir.h	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_algebraic.py	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_builder.h	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_clone.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_control_flow.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_dominance.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_gather_info.c	patch \|	\|	\|	blob
src/compiler/nir/nir_inline_functions.c	patch \|	\|	\|	blob
src/compiler/nir/nir_instr_set.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_intrinsics.h	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_lower_alu_to_scalar.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_lower_atomics.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_lower_indirect_derefs.c	patch \|	\|	\|	blob
src/compiler/nir/nir_lower_io.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_lower_outputs_to_temporaries.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_lower_returns.c	patch \|	\|	\|	blob
src/compiler/nir/nir_lower_samplers.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_lower_system_values.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_lower_vars_to_ssa.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_opcodes.py	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_opt_algebraic.py	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_phi_builder.c	patch \|	\|	\|	blob
src/compiler/nir/nir_phi_builder.h	patch \|	\|	\|	blob
src/compiler/nir/nir_print.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_remove_dead_variables.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_repair_ssa.c	patch \|	\|	\|	blob
src/compiler/nir/nir_sweep.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/nir_validate.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir/spirv/GLSL.std.450.h	patch \|	\|	\|	blob
src/compiler/nir/spirv/nir_spirv.h	patch \|	\|	\|	blob
src/compiler/nir/spirv/spirv.h	patch \|	\|	\|	blob
src/compiler/nir/spirv/spirv_to_nir.c	patch \|	\|	\|	blob
src/compiler/nir/spirv/vtn_alu.c	patch \|	\|	\|	blob
src/compiler/nir/spirv/vtn_cfg.c	patch \|	\|	\|	blob
src/compiler/nir/spirv/vtn_glsl450.c	patch \|	\|	\|	blob
src/compiler/nir/spirv/vtn_private.h	patch \|	\|	\|	blob
src/compiler/nir/spirv/vtn_variables.c	patch \|	\|	\|	blob
src/compiler/nir/spirv2nir.c	patch \|	\|	\|	blob
src/compiler/nir_types.cpp	patch \|	\|	diff2 \|	blob \| history
src/compiler/nir_types.h	patch \|	\|	diff2 \|	blob \| history
src/compiler/shader_enums.c	patch \|	\|	diff2 \|	blob \| history
src/compiler/shader_enums.h	patch \|	\|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_compiler.c	patch \|	diff1 \|	\|	blob \| history
src/mesa/drivers/dri/i965/brw_fs.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_fs_nir.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_link.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_nir.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_program.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_vec4.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/main/mtypes.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/program/ir_to_mesa.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/program/prog_to_nir.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/state_tracker/st_glsl_to_tgsi.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/vulkan/Makefile.am	patch \|	diff1 \|	\|	blob \| history
src/vulkan/anv_meta.c	patch \|	diff1 \|	\|	blob \| history
src/vulkan/anv_meta_clear.c	patch \|	diff1 \|	\|	blob \| history
src/vulkan/anv_meta_resolve.c	patch \|	diff1 \|	\|	blob \| history
src/vulkan/anv_nir.h	patch \|	diff1 \|	\|	blob \| history
src/vulkan/anv_nir_apply_dynamic_offsets.c	patch \|	diff1 \|	\|	blob \| history
src/vulkan/anv_nir_apply_pipeline_layout.c	patch \|	diff1 \|	\|	blob \| history
src/vulkan/anv_pipeline.c	patch \|	diff1 \|	\|	blob \| history