llvmpipe: improve rasterization discard logic
authorRoland Scheidegger <sroland@vmware.com>
Tue, 22 May 2018 00:12:38 +0000 (02:12 +0200)
committerRoland Scheidegger <sroland@vmware.com>
Wed, 23 May 2018 02:23:32 +0000 (04:23 +0200)
This unifies the explicit rasterization discard as well as the implicit
rasterization disabled logic (which we need for another state tracker),
which really should do the exact same thing.
We'll now toss out the prims early on in setup with (implicit or
explicit) discard, rather than do setup and binning with them, which
was entirely pointless.
(We should eventually get rid of implicit discard, which should also
enable us to discard stuff already in draw, hence draw would be
able to skip the pointless clip and fallback stages in this case.)
We still need separate logic for only null ps - this is not the same
as rasterization discard. But simplify the logic there and don't count
primitives simply when there's an empty fs, regardless of depth/stencil
tests, which seems perfectly acceptable by d3d10.
While here, also fix statistics for primitives if face culling is
enabled.
No piglit changes.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
15 files changed:
src/gallium/drivers/llvmpipe/lp_context.h
src/gallium/drivers/llvmpipe/lp_jit.c
src/gallium/drivers/llvmpipe/lp_jit.h
src/gallium/drivers/llvmpipe/lp_rast.c
src/gallium/drivers/llvmpipe/lp_rast_priv.h
src/gallium/drivers/llvmpipe/lp_scene.c
src/gallium/drivers/llvmpipe/lp_scene.h
src/gallium/drivers/llvmpipe/lp_setup.c
src/gallium/drivers/llvmpipe/lp_setup_line.c
src/gallium/drivers/llvmpipe/lp_setup_point.c
src/gallium/drivers/llvmpipe/lp_setup_tri.c
src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
src/gallium/drivers/llvmpipe/lp_state_derived.c
src/gallium/drivers/llvmpipe/lp_state_fs.c
src/gallium/drivers/llvmpipe/lp_state_fs.h

index 54d98fdbf7d96861af6e85668324c0828fdc0657..7a2f253984280f2b80c731c04f7ab1bd0b445b7f 100644 (file)
@@ -136,7 +136,6 @@ struct llvmpipe_context {
    struct blitter_context *blitter;
 
    unsigned tex_timestamp;
-   boolean no_rast;
 
    /** List of all fragment shader variants */
    struct lp_fs_variant_list_item fs_variants_list;
index a2762f39a0455e0b94a4c2dbccfafa4cede795cd..e2309f471573defbea7b9471852e7dbb2afb5385 100644 (file)
@@ -212,6 +212,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
       elem_types[LP_JIT_THREAD_DATA_CACHE] =
             LLVMPointerType(lp_build_format_cache_type(gallivm), 0);
       elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc);
+      elem_types[LP_JIT_THREAD_DATA_INVOCATIONS] = LLVMInt64TypeInContext(lc);
       elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] =
             LLVMInt32TypeInContext(lc);
 
index 9db26f2cba91d20b0651411c2319e79c34585694..312d1a1281db3cc527f0e4314761cd01a6f973ba 100644 (file)
@@ -192,6 +192,7 @@ struct lp_jit_thread_data
 {
    struct lp_build_format_cache *cache;
    uint64_t vis_counter;
+   uint64_t ps_invocations;
 
    /*
     * Non-interpolated rasterizer state passed through to the fragment shader.
@@ -205,6 +206,7 @@ struct lp_jit_thread_data
 enum {
    LP_JIT_THREAD_DATA_CACHE = 0,
    LP_JIT_THREAD_DATA_COUNTER,
+   LP_JIT_THREAD_DATA_INVOCATIONS,
    LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX,
    LP_JIT_THREAD_DATA_COUNT
 };
@@ -216,6 +218,9 @@ enum {
 #define lp_jit_thread_data_counter(_gallivm, _ptr) \
    lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_COUNTER, "counter")
 
+#define lp_jit_thread_data_invocations(_gallivm, _ptr) \
+   lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_INVOCATIONS, "invocs")
+
 #define lp_jit_thread_data_raster_state_viewport_index(_gallivm, _ptr) \
    lp_build_struct_get(_gallivm, _ptr, \
                        LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX, \
index 939944aa791359e36fb0b5ff0e3d1de6d98f81a0..9d4f9f8d0276af5d151cc67a1f478e3fa6115861 100644 (file)
@@ -107,7 +107,7 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
                     task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
 
    task->thread_data.vis_counter = 0;
-   task->ps_invocations = 0;
+   task->thread_data.ps_invocations = 0;
 
    for (i = 0; i < task->scene->fb.nr_cbufs; i++) {
       if (task->scene->fb.cbufs[i]) {
@@ -446,10 +446,6 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
     * allocated 4x4 blocks hence need to filter them out here.
     */
    if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
-      /* not very accurate would need a popcount on the mask */
-      /* always count this not worth bothering? */
-      task->ps_invocations += 1 * variant->ps_inv_multiplier;
-
       /* Propagate non-interpolated raster state. */
       task->thread_data.raster_state.viewport_index = inputs->viewport_index;
 
@@ -491,7 +487,7 @@ lp_rast_begin_query(struct lp_rasterizer_task *task,
       pq->start[task->thread_index] = task->thread_data.vis_counter;
       break;
    case PIPE_QUERY_PIPELINE_STATISTICS:
-      pq->start[task->thread_index] = task->ps_invocations;
+      pq->start[task->thread_index] = task->thread_data.ps_invocations;
       break;
    default:
       assert(0);
@@ -524,7 +520,7 @@ lp_rast_end_query(struct lp_rasterizer_task *task,
       break;
    case PIPE_QUERY_PIPELINE_STATISTICS:
       pq->end[task->thread_index] +=
-         task->ps_invocations - pq->start[task->thread_index];
+         task->thread_data.ps_invocations - pq->start[task->thread_index];
       pq->start[task->thread_index] = 0;
       break;
    default:
@@ -679,7 +675,7 @@ rasterize_scene(struct lp_rasterizer_task *task,
 #endif
 #endif
 
-   if (!task->rast->no_rast && !scene->discard) {
+   if (!task->rast->no_rast) {
       /* loop over scene bins, rasterize each */
       {
          struct cmd_bin *bin;
index fe078d5b8698a7b7bcb586abc651d634ec68f407..59d3a2d8c88cbac5b3ae658f8e77ca61a3241952 100644 (file)
@@ -99,8 +99,6 @@ struct lp_rasterizer_task
 
    /** Non-interpolated passthru state and occlude counter for visible pixels */
    struct lp_jit_thread_data thread_data;
-   uint64_t ps_invocations;
-   uint8_t ps_inv_multiplier;
 
    pipe_semaphore work_ready;
    pipe_semaphore work_done;
@@ -259,10 +257,6 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
     * allocated 4x4 blocks hence need to filter them out here.
     */
    if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
-      /* not very accurate would need a popcount on the mask */
-      /* always count this not worth bothering? */
-      task->ps_invocations += 1 * variant->ps_inv_multiplier;
-
       /* Propagate non-interpolated raster state. */
       task->thread_data.raster_state.viewport_index = inputs->viewport_index;
 
index dfad9fabb20d35a2ad562525ad48dbf9f851b368..ef0136c4fd2eb40a2d65055c9e11dd27e5cfdff0 100644 (file)
@@ -507,15 +507,14 @@ end:
 }
 
 
-void lp_scene_begin_binning( struct lp_scene *scene,
-                             struct pipe_framebuffer_state *fb, boolean discard )
+void lp_scene_begin_binning(struct lp_scene *scene,
+                            struct pipe_framebuffer_state *fb)
 {
    int i;
    unsigned max_layer = ~0;
 
    assert(lp_scene_is_empty(scene));
 
-   scene->discard = discard;
    util_copy_framebuffer_state(&scene->fb, fb);
 
    scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE;
index da29057f1ef19f4f3b7543752b9edd32308c9c7f..b4ed8817ea7136ef718c82fe3740f4ce23d0b4b5 100644 (file)
@@ -166,7 +166,6 @@ struct lp_scene {
    unsigned resource_reference_size;
 
    boolean alloc_failed;
-   boolean discard;
    /**
     * Number of active tiles in each dimension.
     * This basically the framebuffer size divided by tile size
@@ -389,12 +388,11 @@ lp_scene_bin_iter_next( struct lp_scene *scene, int *x, int *y );
 /* Begin/end binning of a scene
  */
 void
-lp_scene_begin_binning( struct lp_scene *scene,
-                        struct pipe_framebuffer_state *fb,
-                        boolean discard );
+lp_scene_begin_binning(struct lp_scene *scene,
+                       struct pipe_framebuffer_state *fb);
 
 void
-lp_scene_end_binning( struct lp_scene *scene );
+lp_scene_end_binning(struct lp_scene *scene);
 
 
 /* Begin/end rasterization of a scene
@@ -403,7 +401,7 @@ void
 lp_scene_begin_rasterization(struct lp_scene *scene);
 
 void
-lp_scene_end_rasterization(struct lp_scene *scene );
+lp_scene_end_rasterization(struct lp_scene *scene);
 
 
 
index c1573231335c0bafbf7835ea6d1883b7cd763665..b08736947325a8e1241b63ba6f5a281cf6616f1d 100644 (file)
@@ -82,7 +82,7 @@ lp_setup_get_empty_scene(struct lp_setup_context *setup)
       lp_fence_wait(setup->scene->fence);
    }
 
-   lp_scene_begin_binning(setup->scene, &setup->fb, setup->rasterizer_discard);
+   lp_scene_begin_binning(setup->scene, &setup->fb);
 
 }
 
@@ -724,25 +724,27 @@ lp_setup_set_scissors( struct lp_setup_context *setup,
 
 
 void 
-lp_setup_set_flatshade_first( struct lp_setup_context *setup,
-                              boolean flatshade_first )
+lp_setup_set_flatshade_first(struct lp_setup_context *setup,
+                             boolean flatshade_first)
 {
    setup->flatshade_first = flatshade_first;
 }
 
 void
-lp_setup_set_rasterizer_discard( struct lp_setup_context *setup,
-                                 boolean rasterizer_discard )
+lp_setup_set_rasterizer_discard(struct lp_setup_context *setup,
+                                boolean rasterizer_discard)
 {
    if (setup->rasterizer_discard != rasterizer_discard) {
       setup->rasterizer_discard = rasterizer_discard;
-      set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ );
+      setup->line = first_line;
+      setup->point = first_point;
+      setup->triangle = first_triangle;
    }
 }
 
 void 
-lp_setup_set_vertex_info( struct lp_setup_context *setup,
-                          struct vertex_info *vertex_info )
+lp_setup_set_vertex_info(struct lp_setup_context *setup,
+                         struct vertex_info *vertex_info)
 {
    /* XXX: just silently holding onto the pointer:
     */
index d0bac5efb99fd4ff4120662a07e0937d6956ccb5..c1d8237a8acab9cbfb246e349163a7d94220116e 100644 (file)
@@ -616,8 +616,7 @@ try_setup_line( struct lp_setup_context *setup,
 
    LP_COUNT(nr_tris);
 
-   if (lp_context->active_statistics_queries &&
-       !llvmpipe_rasterization_disabled(lp_context)) {
+   if (lp_context->active_statistics_queries) {
       lp_context->pipeline_statistics.c_primitives++;
    }
 
@@ -759,24 +758,33 @@ try_setup_line( struct lp_setup_context *setup,
 }
 
 
-static void lp_setup_linestruct lp_setup_context *setup,
-                           const float (*v0)[4],
-                           const float (*v1)[4] )
+static void lp_setup_line_discard(struct lp_setup_context *setup,
+                                  const float (*v0)[4],
+                                  const float (*v1)[4])
 {
-   if (!try_setup_line( setup, v0, v1 ))
-   {
+}
+
+static void lp_setup_line(struct lp_setup_context *setup,
+                          const float (*v0)[4],
+                          const float (*v1)[4])
+{
+   if (!try_setup_line(setup, v0, v1)) {
       if (!lp_setup_flush_and_restart(setup))
          return;
 
-      if (!try_setup_line( setup, v0, v1 ))
+      if (!try_setup_line(setup, v0, v1))
          return;
    }
 }
 
 
-void lp_setup_choose_line( struct lp_setup_context *setup ) 
+void lp_setup_choose_line(struct lp_setup_context *setup)
 { 
-   setup->line = lp_setup_line;
+   if (setup->rasterizer_discard) {
+      setup->line = lp_setup_line_discard;
+   } else {
+      setup->line = lp_setup_line;
+   }
 }
 
 
index 8cb6b83f9164e9313fd2d3102f4cd1afb79dd3d7..2192789bd4c347ae81159dc3dccf69fa06337cd1 100644 (file)
@@ -458,8 +458,7 @@ try_setup_point( struct lp_setup_context *setup,
 
    LP_COUNT(nr_tris);
 
-   if (lp_context->active_statistics_queries &&
-       !llvmpipe_rasterization_disabled(lp_context)) {
+   if (lp_context->active_statistics_queries) {
       lp_context->pipeline_statistics.c_primitives++;
    }
 
@@ -518,24 +517,33 @@ try_setup_point( struct lp_setup_context *setup,
 
 
 static void 
+lp_setup_point_discard(struct lp_setup_context *setup,
+                       const float (*v0)[4])
+{
+}
+
+static void
 lp_setup_point(struct lp_setup_context *setup,
                const float (*v0)[4])
 {
-   if (!try_setup_point( setup, v0 ))
-   {
+   if (!try_setup_point(setup, v0)) {
       if (!lp_setup_flush_and_restart(setup))
          return;
 
-      if (!try_setup_point( setup, v0 ))
+      if (!try_setup_point(setup, v0))
          return;
    }
 }
 
 
 void 
-lp_setup_choose_point( struct lp_setup_context *setup )
+lp_setup_choose_point(struct lp_setup_context *setup)
 {
-   setup->point = lp_setup_point;
+   if (setup->rasterizer_discard) {
+      setup->point = lp_setup_point_discard;
+   } else {
+      setup->point = lp_setup_point;
+   }
 }
 
 
index 39755d6b581f7a9205edd99ba9092c47897f7d6e..cec6198ec63bc83d7a27c4c6ffa98405db317c90 100644 (file)
@@ -1127,6 +1127,11 @@ static void triangle_cw(struct lp_setup_context *setup,
                         const float (*v2)[4])
 {
    PIPE_ALIGN_VAR(16) struct fixed_position position;
+   struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
+
+   if (lp_context->active_statistics_queries) {
+      lp_context->pipeline_statistics.c_primitives++;
+   }
 
    calc_fixed_position(setup, &position, v0, v1, v2);
 
@@ -1148,6 +1153,11 @@ static void triangle_ccw(struct lp_setup_context *setup,
                          const float (*v2)[4])
 {
    PIPE_ALIGN_VAR(16) struct fixed_position position;
+   struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
+
+   if (lp_context->active_statistics_queries) {
+      lp_context->pipeline_statistics.c_primitives++;
+   }
 
    calc_fixed_position(setup, &position, v0, v1, v2);
 
@@ -1166,8 +1176,7 @@ static void triangle_both(struct lp_setup_context *setup,
    PIPE_ALIGN_VAR(16) struct fixed_position position;
    struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
 
-   if (lp_context->active_statistics_queries &&
-       !llvmpipe_rasterization_disabled(lp_context)) {
+   if (lp_context->active_statistics_queries) {
       lp_context->pipeline_statistics.c_primitives++;
    }
 
@@ -1196,17 +1205,21 @@ static void triangle_both(struct lp_setup_context *setup,
 }
 
 
-static void triangle_nop( struct lp_setup_context *setup,
-                         const float (*v0)[4],
-                         const float (*v1)[4],
-                         const float (*v2)[4] )
+static void triangle_noop(struct lp_setup_context *setup,
+                          const float (*v0)[4],
+                          const float (*v1)[4],
+                          const float (*v2)[4])
 {
 }
 
 
 void 
-lp_setup_choose_triangle( struct lp_setup_context *setup )
+lp_setup_choose_triangle(struct lp_setup_context *setup)
 {
+   if (setup->rasterizer_discard) {
+      setup->triangle = triangle_noop;
+      return;
+   }
    switch (setup->cullmode) {
    case PIPE_FACE_NONE:
       setup->triangle = triangle_both;
@@ -1218,7 +1231,7 @@ lp_setup_choose_triangle( struct lp_setup_context *setup )
       setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw;
       break;
    default:
-      setup->triangle = triangle_nop;
+      setup->triangle = triangle_noop;
       break;
    }
 }
index 28a48d488206eab2f50c3b8f5113b8ef5829f994..6675b20168b3b73e1b3f0a565136674edc2d269e 100644 (file)
@@ -571,7 +571,7 @@ lp_setup_pipeline_statistics(
       stats->gs_invocations;
    llvmpipe->pipeline_statistics.gs_primitives +=
       stats->gs_primitives;
-   if (!llvmpipe_rasterization_disabled(llvmpipe)) {
+   if (!setup->rasterizer_discard) {
       llvmpipe->pipeline_statistics.c_invocations +=
          stats->c_invocations;
    } else {
index 3e75d44dac6a657ab94bab8fb1c1866fcc851434..4bcca9072449ac77496bbb7f7646e7461dab6a3b 100644 (file)
@@ -207,13 +207,27 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
                           LP_NEW_SAMPLER |
                           LP_NEW_SAMPLER_VIEW |
                           LP_NEW_OCCLUSION_QUERY))
-      llvmpipe_update_fs( llvmpipe );
+      llvmpipe_update_fs(llvmpipe);
 
-   if (llvmpipe->dirty & (LP_NEW_RASTERIZER)) {
+   if (llvmpipe->dirty & (LP_NEW_FS |
+                          LP_NEW_FRAMEBUFFER |
+                          LP_NEW_RASTERIZER |
+                          LP_NEW_DEPTH_STENCIL_ALPHA)) {
+
+      /*
+       * Rasterization is disabled if there is no pixel shader and
+       * both depth and stencil testing are disabled:
+       * http://msdn.microsoft.com/en-us/library/windows/desktop/bb205125
+       * FIXME: set rasterizer_discard in state tracker instead.
+       */
+      boolean null_fs = !llvmpipe->fs ||
+                        llvmpipe->fs->info.base.num_instructions <= 1;
       boolean discard =
          (llvmpipe->sample_mask & 1) == 0 ||
-         (llvmpipe->rasterizer ? llvmpipe->rasterizer->rasterizer_discard : FALSE);
-
+         (llvmpipe->rasterizer ? llvmpipe->rasterizer->rasterizer_discard : FALSE) ||
+         (null_fs &&
+          !llvmpipe->depth_stencil->depth.enabled &&
+          !llvmpipe->depth_stencil->stencil[0].enabled);
       lp_setup_set_rasterizer_discard(llvmpipe->setup, discard);
    }
 
index 91b68e7c96e7ab2106d67bfa44ce72485b2e9500..b7e16f92469782d540e7862281114283a37094c8 100644 (file)
@@ -2554,6 +2554,25 @@ generate_fragment(struct llvmpipe_context *lp,
    assert(builder);
    LLVMPositionBuilderAtEnd(builder, block);
 
+   /*
+    * Must not count ps invocations if there's a null shader.
+    * (It would be ok to count with null shader if there's d/s tests,
+    * but only if there's d/s buffers too, which is different
+    * to implicit rasterization disable which must not depend
+    * on the d/s buffers.)
+    * Could use popcount on mask, but pixel accuracy is not required.
+    * Could disable if there's no stats query, but maybe not worth it.
+    */
+   if (shader->info.base.num_instructions > 1) {
+      LLVMValueRef invocs, val;
+      invocs = lp_jit_thread_data_invocations(gallivm, thread_data_ptr);
+      val = LLVMBuildLoad(builder, invocs, "");
+      val = LLVMBuildAdd(builder, val,
+                         LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 1, 0),
+                         "invoc_count");
+      LLVMBuildStore(builder, val, invocs);
+   }
+
    /* code generated texture sampling */
    sampler = lp_llvm_sampler_soa_create(key->state);
 
@@ -2843,14 +2862,6 @@ generate_variant(struct llvmpipe_context *lp,
          !shader->info.base.writes_samplemask
       ? TRUE : FALSE;
 
-   /* if num_instructions == 1, it's a nop shader with only an END instruction */
-   if ((shader->info.base.num_instructions <= 1) &&
-       !key->depth.enabled && !key->stencil[0].enabled) {
-      variant->ps_inv_multiplier = 0;
-   } else {
-      variant->ps_inv_multiplier = 1;
-   }
-
    if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
       lp_debug_fs_variant(variant);
    }
@@ -3471,18 +3482,4 @@ llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe)
    llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer;
 }
 
-/*
- * Rasterization is disabled if there is no pixel shader and
- * both depth and stencil testing are disabled:
- * http://msdn.microsoft.com/en-us/library/windows/desktop/bb205125
- */
-boolean
-llvmpipe_rasterization_disabled(struct llvmpipe_context *lp)
-{
-   /* if num_instructions == 1, it's a nop shader with only an END instruction */
-   boolean null_fs = !lp->fs || lp->fs->info.base.num_instructions <= 1;
 
-   return (null_fs &&
-           !lp->depth_stencil->depth.enabled &&
-           !lp->depth_stencil->stencil[0].enabled);
-}
index 2ddd8518834282d8f9e04d7bc2152d1458882cdd..28eccde17f8ce558c52a92f6417f816b1e37fcdc 100644 (file)
@@ -98,7 +98,6 @@ struct lp_fragment_shader_variant
    struct lp_fragment_shader_variant_key key;
 
    boolean opaque;
-   uint8_t ps_inv_multiplier;
 
    struct gallivm_state *gallivm;
 
@@ -150,8 +149,4 @@ void
 llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
                                struct lp_fragment_shader_variant *variant);
 
-boolean
-llvmpipe_rasterization_disabled(struct llvmpipe_context *lp);
-
-
 #endif /* LP_STATE_FS_H_ */