From 3967c7df4cbd096fce5858487cfbb38f95d04d06 Mon Sep 17 00:00:00 2001
From: kvark <kvark@mozilla.com>
Date: Tue, 15 Nov 2016 14:44:27 -0500
Subject: [PATCH] Mask generation in the off-screen cache textures with support
 for multiple rounded cornered rectangles. Removal of the _clip shader
 variants.

---
 replay/src/main.rs                            |   1 +
 sample/src/main.rs                            |   8 +-
 webrender/res/clip_shared.glsl                |  84 +++---
 webrender/res/cs_clip_clear.fs.glsl           |   7 +
 webrender/res/cs_clip_clear.vs.glsl           |  13 +
 webrender/res/cs_clip_image.fs.glsl           |  16 +
 ...rectangle_clip.glsl => cs_clip_image.glsl} |   7 +-
 webrender/res/cs_clip_image.vs.glsl           |  38 +++
 webrender/res/cs_clip_rectangle.fs.glsl       |  44 +++
 ...dient_clip.glsl => cs_clip_rectangle.glsl} |  12 +-
 webrender/res/cs_clip_rectangle.vs.glsl       |  77 +++++
 webrender/res/prim_shared.glsl                | 133 ++++-----
 webrender/res/ps_clear.fs.glsl                |   2 +-
 webrender/res/ps_gradient.fs.glsl             |   3 +-
 webrender/res/ps_gradient.vs.glsl             |   2 +
 webrender/res/ps_gradient_clip.fs.glsl        |  16 -
 webrender/res/ps_gradient_clip.vs.glsl        |  71 -----
 webrender/res/ps_image.fs.glsl                |   6 +-
 webrender/res/ps_image.vs.glsl                |   2 +
 webrender/res/ps_image_clip.fs.glsl           |  32 --
 webrender/res/ps_image_clip.glsl              |  17 --
 webrender/res/ps_image_clip.vs.glsl           |  38 ---
 webrender/res/ps_rectangle.fs.glsl            |   9 +-
 webrender/res/ps_rectangle.vs.glsl            |  10 +-
 webrender/res/ps_rectangle_clip.fs.glsl       |  15 -
 webrender/res/ps_rectangle_clip.vs.glsl       |  27 --
 webrender/res/ps_text_run.fs.glsl             |  13 +-
 webrender/res/ps_text_run.vs.glsl             |   2 +
 webrender/src/debug_render.rs                 |  38 +--
 webrender/src/device.rs                       |   6 +
 webrender/src/frame.rs                        |   4 +-
 webrender/src/gpu_store.rs                    |   2 +-
 webrender/src/internal_types.rs               |   4 +-
 webrender/src/lib.rs                          |   1 +
 webrender/src/mask_cache.rs                   | 141 +++++++++
 webrender/src/prim_store.rs                   | 136 +++------
 webrender/src/render_backend.rs               |   6 +-
 webrender/src/renderer.rs                     | 215 ++++++++------
 webrender/src/resource_cache.rs               |  14 +-
 webrender/src/tiling.rs                       | 281 +++++++++++++-----
 webrender_traits/src/types.rs                 |   2 +-
 41 files changed, 876 insertions(+), 679 deletions(-)
 create mode 100644 webrender/res/cs_clip_clear.fs.glsl
 create mode 100644 webrender/res/cs_clip_clear.vs.glsl
 create mode 100644 webrender/res/cs_clip_image.fs.glsl
 rename webrender/res/{ps_rectangle_clip.glsl => cs_clip_image.glsl} (76%)
 create mode 100644 webrender/res/cs_clip_image.vs.glsl
 create mode 100644 webrender/res/cs_clip_rectangle.fs.glsl
 rename webrender/res/{ps_gradient_clip.glsl => cs_clip_rectangle.glsl} (69%)
 create mode 100644 webrender/res/cs_clip_rectangle.vs.glsl
 delete mode 100644 webrender/res/ps_gradient_clip.fs.glsl
 delete mode 100644 webrender/res/ps_gradient_clip.vs.glsl
 delete mode 100644 webrender/res/ps_image_clip.fs.glsl
 delete mode 100644 webrender/res/ps_image_clip.glsl
 delete mode 100644 webrender/res/ps_image_clip.vs.glsl
 delete mode 100644 webrender/res/ps_rectangle_clip.fs.glsl
 delete mode 100644 webrender/res/ps_rectangle_clip.vs.glsl
 create mode 100644 webrender/src/mask_cache.rs

diff --git a/replay/src/main.rs b/replay/src/main.rs
index 7628811016..542296b268 100644
--- a/replay/src/main.rs
+++ b/replay/src/main.rs
@@ -79,6 +79,7 @@ fn main() {
     let resource_path = &args[1];
     let ref dir = args[2];
     let window = glutin::WindowBuilder::new()
+        .with_title("WebRender Replay")
         .with_gl(glutin::GlRequest::Specific(glutin::Api::OpenGl, (3,2)))
         .build()
         .unwrap();
diff --git a/sample/src/main.rs b/sample/src/main.rs
index 24d18c40db..914c9b7df2 100644
--- a/sample/src/main.rs
+++ b/sample/src/main.rs
@@ -63,6 +63,7 @@ fn main() {
     let res_path = &args[1];
 
     let window = glutin::WindowBuilder::new()
+                .with_title("WebRender Sample")
                 .with_gl(glutin::GlRequest::Specific(glutin::Api::OpenGl, (3, 2)))
                 .build()
                 .unwrap();
@@ -133,14 +134,15 @@ fn main() {
                                                &mut auxiliary_lists_builder));
 
     let clip_region = {
-        let rect = Rect::new(Point2D::new(100.0, 100.0), Size2D::new(100.0, 100.0));
         let mask = webrender_traits::ImageMask {
             image: api.add_image(2, 2, None, ImageFormat::A8, vec![0,80, 180, 255]),
-            rect: rect,
+            rect: Rect::new(Point2D::new(75.0, 75.0), Size2D::new(100.0, 100.0)),
             repeat: false,
         };
         let radius = webrender_traits::BorderRadius::uniform(20.0);
-        let complex = webrender_traits::ComplexClipRegion::new(rect, radius);
+        let complex = webrender_traits::ComplexClipRegion::new(
+            Rect::new(Point2D::new(50.0, 50.0), Size2D::new(100.0, 100.0)),
+            radius);
 
         webrender_traits::ClipRegion::new(&bounds,
                                           vec![complex],
diff --git a/webrender/res/clip_shared.glsl b/webrender/res/clip_shared.glsl
index 62e51c3bbf..728ce8dea8 100644
--- a/webrender/res/clip_shared.glsl
+++ b/webrender/res/clip_shared.glsl
@@ -3,63 +3,49 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-flat varying vec4 vClipRect;
-flat varying vec4 vClipRadius;
-flat varying vec4 vClipMaskUvRect;
-flat varying vec4 vClipMaskLocalRect;
-
 #ifdef WR_VERTEX_SHADER
-void write_clip(ClipData clip) {
-    vClipRect = vec4(clip.rect.rect.xy, clip.rect.rect.xy + clip.rect.rect.zw);
-    vClipRadius = vec4(clip.top_left.outer_inner_radius.x,
-                       clip.top_right.outer_inner_radius.x,
-                       clip.bottom_right.outer_inner_radius.x,
-                       clip.bottom_left.outer_inner_radius.x);
-    //TODO: interpolate the final mask UV
-    vec2 texture_size = textureSize(sMask, 0);
-    vClipMaskUvRect = clip.mask_data.uv_rect / texture_size.xyxy;
-    vClipMaskLocalRect = clip.mask_data.local_rect; //TODO: transform
-}
-#endif
 
-#ifdef WR_FRAGMENT_SHADER
-float do_clip(vec2 pos) {
-    vec2 ref_tl = vClipRect.xy + vec2( vClipRadius.x,  vClipRadius.x);
-    vec2 ref_tr = vClipRect.zy + vec2(-vClipRadius.y,  vClipRadius.y);
-    vec2 ref_br = vClipRect.zw + vec2(-vClipRadius.z, -vClipRadius.z);
-    vec2 ref_bl = vClipRect.xw + vec2( vClipRadius.w, -vClipRadius.w);
+struct CacheClipInstance {
+    int render_task_index;
+    int layer_index;
+    int data_index;
+};
 
-    float d_tl = distance(pos, ref_tl);
-    float d_tr = distance(pos, ref_tr);
-    float d_br = distance(pos, ref_br);
-    float d_bl = distance(pos, ref_bl);
+CacheClipInstance fetch_clip_item(int index) {
+    CacheClipInstance cci;
 
-    float pixels_per_fragment = length(fwidth(pos.xy));
-    float nudge = 0.5 * pixels_per_fragment;
-    vec4 distances = vec4(d_tl, d_tr, d_br, d_bl) - vClipRadius + nudge;
+    int offset = index * 1;
 
-    bvec4 is_out = bvec4(pos.x < ref_tl.x && pos.y < ref_tl.y,
-                         pos.x > ref_tr.x && pos.y < ref_tr.y,
-                         pos.x > ref_br.x && pos.y > ref_br.y,
-                         pos.x < ref_bl.x && pos.y > ref_bl.y);
+    ivec4 data0 = int_data[offset + 0];
 
-    float distance_from_border = dot(vec4(is_out),
-                                     max(vec4(0.0, 0.0, 0.0, 0.0), distances));
+    cci.render_task_index = data0.x;
+    cci.layer_index = data0.y;
+    cci.data_index = data0.z;
+
+    return cci;
+}
 
-    // Move the distance back into pixels.
-    distance_from_border /= pixels_per_fragment;
-    // Apply a more gradual fade out to transparent.
-    //distance_from_border -= 0.5;
+// The transformed vertex function that always covers the whole whole clip area,
+// which is the intersection of all clip instances of a given primitive
+TransformVertexInfo write_clip_tile_vertex(vec4 local_clip_rect,
+                                           Layer layer,
+                                           ClipArea area) {
+    vec2 lp0_base = local_clip_rect.xy;
+    vec2 lp1_base = local_clip_rect.xy + local_clip_rect.zw;
 
-    float border_alpha = 1.0 - smoothstep(0.0, 1.0, distance_from_border);
+    vec2 lp0 = clamp_rect(lp0_base, layer.local_clip_rect);
+    vec2 lp1 = clamp_rect(lp1_base, layer.local_clip_rect);
+    vec4 clipped_local_rect = vec4(lp0, lp1 - lp0);
 
-    bool repeat_mask = false; //TODO
-    vec2 vMaskUv = (pos - vClipMaskLocalRect.xy) / vClipMaskLocalRect.zw;
-    vec2 clamped_mask_uv = repeat_mask ? fract(vMaskUv) :
-        clamp(vMaskUv, vec2(0.0, 0.0), vec2(1.0, 1.0));
-    vec2 source_uv = clamped_mask_uv * vClipMaskUvRect.zw + vClipMaskUvRect.xy;
-    float mask_alpha = texture(sMask, source_uv).r; //careful: texture has type A8
+    vec2 final_pos = mix(area.task_bounds.xy, area.task_bounds.zw, aPosition.xy);
 
-    return border_alpha * mask_alpha;
+    // compute the point position in side the layer, in CSS space
+    vec2 clamped_pos = final_pos + area.screen_origin_target_index.xy - area.task_bounds.xy;
+    vec4 layer_pos = get_layer_pos(clamped_pos / uDevicePixelRatio, layer);
+
+    gl_Position = uTransform * vec4(final_pos, 0.0, 1);
+
+    return TransformVertexInfo(layer_pos.xyw, clamped_pos, clipped_local_rect);
 }
-#endif
+
+#endif //WR_VERTEX_SHADER
diff --git a/webrender/res/cs_clip_clear.fs.glsl b/webrender/res/cs_clip_clear.fs.glsl
new file mode 100644
index 0000000000..bbdd2e6457
--- /dev/null
+++ b/webrender/res/cs_clip_clear.fs.glsl
@@ -0,0 +1,7 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+void main(void) {
+    oFragColor = vec4(1.0, 1.0, 1.0, 1.0);
+}
diff --git a/webrender/res/cs_clip_clear.vs.glsl b/webrender/res/cs_clip_clear.vs.glsl
new file mode 100644
index 0000000000..f4c85773c3
--- /dev/null
+++ b/webrender/res/cs_clip_clear.vs.glsl
@@ -0,0 +1,13 @@
+#line 1
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+void main(void) {
+    CacheClipInstance cci = fetch_clip_item(gl_InstanceID);
+    ClipArea area = fetch_clip_area(cci.render_task_index);
+
+    vec2 final_pos = mix(area.task_bounds.xy, area.task_bounds.zw, aPosition.xy);
+
+    gl_Position = uTransform * vec4(final_pos, 0.0, 1.0);
+}
diff --git a/webrender/res/cs_clip_image.fs.glsl b/webrender/res/cs_clip_image.fs.glsl
new file mode 100644
index 0000000000..b93656d3d5
--- /dev/null
+++ b/webrender/res/cs_clip_image.fs.glsl
@@ -0,0 +1,16 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+void main(void) {
+    float alpha = 1.f;
+    vec2 local_pos = init_transform_fs(vPos, vLocalRect, alpha);
+
+    bool repeat_mask = false; //TODO
+    vec2 clamped_mask_uv = repeat_mask ? fract(vClipMaskUv.xy) :
+        clamp(vClipMaskUv.xy, vec2(0.0, 0.0), vec2(1.0, 1.0));
+    vec2 source_uv = clamped_mask_uv * vClipMaskUvRect.zw + vClipMaskUvRect.xy;
+    float clip_alpha = texture(sMask, source_uv).r; //careful: texture has type A8
+
+    oFragColor = vec4(1.0, 1.0, 1.0, min(alpha, clip_alpha));
+}
diff --git a/webrender/res/ps_rectangle_clip.glsl b/webrender/res/cs_clip_image.glsl
similarity index 76%
rename from webrender/res/ps_rectangle_clip.glsl
rename to webrender/res/cs_clip_image.glsl
index abd4a6cc5e..e5fe1f8623 100644
--- a/webrender/res/ps_rectangle_clip.glsl
+++ b/webrender/res/cs_clip_image.glsl
@@ -4,11 +4,6 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-varying vec4 vColor;
-
-#ifdef WR_FEATURE_TRANSFORM
 varying vec3 vPos;
 flat varying vec4 vLocalRect;
-#else
-varying vec2 vPos;
-#endif
+flat varying vec4 vClipMaskUvRect;
diff --git a/webrender/res/cs_clip_image.vs.glsl b/webrender/res/cs_clip_image.vs.glsl
new file mode 100644
index 0000000000..4c9a96b637
--- /dev/null
+++ b/webrender/res/cs_clip_image.vs.glsl
@@ -0,0 +1,38 @@
+#line 1
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+struct ImageMaskData {
+    vec4 uv_rect;
+    vec4 local_rect;
+};
+
+ImageMaskData fetch_mask_data(int index) {
+    ImageMaskData info;
+
+    ivec2 uv = get_fetch_uv_2(index);
+
+    info.uv_rect = texelFetchOffset(sData32, uv, 0, ivec2(0, 0));
+    info.local_rect = texelFetchOffset(sData32, uv, 0, ivec2(1, 0));
+
+    return info;
+}
+
+void main(void) {
+    CacheClipInstance cci = fetch_clip_item(gl_InstanceID);
+    ClipArea area = fetch_clip_area(cci.render_task_index);
+    Layer layer = fetch_layer(cci.layer_index);
+    ImageMaskData mask = fetch_mask_data(cci.data_index);
+    vec4 local_rect = mask.local_rect;
+
+    TransformVertexInfo vi = write_clip_tile_vertex(local_rect,
+                                                    layer,
+                                                    area);
+    vLocalRect = vi.clipped_local_rect;
+    vPos = vi.local_pos;
+
+    vClipMaskUv = vec3((vPos.xy / vPos.z - local_rect.xy) / local_rect.zw, 0.0);
+    vec2 texture_size = textureSize(sMask, 0);
+    vClipMaskUvRect = mask.uv_rect / texture_size.xyxy;
+}
diff --git a/webrender/res/cs_clip_rectangle.fs.glsl b/webrender/res/cs_clip_rectangle.fs.glsl
new file mode 100644
index 0000000000..24c4469113
--- /dev/null
+++ b/webrender/res/cs_clip_rectangle.fs.glsl
@@ -0,0 +1,44 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+float rounded_rect(vec2 pos) {
+    vec2 ref_tl = vClipRect.xy + vec2( vClipRadius.x,  vClipRadius.x);
+    vec2 ref_tr = vClipRect.zy + vec2(-vClipRadius.y,  vClipRadius.y);
+    vec2 ref_br = vClipRect.zw + vec2(-vClipRadius.z, -vClipRadius.z);
+    vec2 ref_bl = vClipRect.xw + vec2( vClipRadius.w, -vClipRadius.w);
+
+    float d_tl = distance(pos, ref_tl);
+    float d_tr = distance(pos, ref_tr);
+    float d_br = distance(pos, ref_br);
+    float d_bl = distance(pos, ref_bl);
+
+    float pixels_per_fragment = length(fwidth(pos.xy));
+    float nudge = 0.5 * pixels_per_fragment;
+    vec4 distances = vec4(d_tl, d_tr, d_br, d_bl) - vClipRadius + nudge;
+
+    bvec4 is_out = bvec4(pos.x < ref_tl.x && pos.y < ref_tl.y,
+                         pos.x > ref_tr.x && pos.y < ref_tr.y,
+                         pos.x > ref_br.x && pos.y > ref_br.y,
+                         pos.x < ref_bl.x && pos.y > ref_bl.y);
+
+    float distance_from_border = dot(vec4(is_out),
+                                     max(vec4(0.0, 0.0, 0.0, 0.0), distances));
+
+    // Move the distance back into pixels.
+    distance_from_border /= pixels_per_fragment;
+    // Apply a more gradual fade out to transparent.
+    //distance_from_border -= 0.5;
+
+    return 1.0 - smoothstep(0.0, 1.0, distance_from_border);
+}
+
+
+void main(void) {
+    float alpha = 1.f;
+    vec2 local_pos = init_transform_fs(vPos, vLocalRect, alpha);
+
+    float clip_alpha = rounded_rect(local_pos);
+
+    oFragColor = vec4(1.0, 1.0, 1.0, min(alpha, clip_alpha));
+}
diff --git a/webrender/res/ps_gradient_clip.glsl b/webrender/res/cs_clip_rectangle.glsl
similarity index 69%
rename from webrender/res/ps_gradient_clip.glsl
rename to webrender/res/cs_clip_rectangle.glsl
index e1d1845363..9e73553cc8 100644
--- a/webrender/res/ps_gradient_clip.glsl
+++ b/webrender/res/cs_clip_rectangle.glsl
@@ -1,12 +1,10 @@
+#line 1
+
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-varying vec4 vColor;
-
-#ifdef WR_FEATURE_TRANSFORM
-varying vec3 vLocalPos;
+varying vec3 vPos;
 flat varying vec4 vLocalRect;
-#else
-varying vec2 vPos;
-#endif
+flat varying vec4 vClipRect;
+flat varying vec4 vClipRadius;
diff --git a/webrender/res/cs_clip_rectangle.vs.glsl b/webrender/res/cs_clip_rectangle.vs.glsl
new file mode 100644
index 0000000000..e81ab59e33
--- /dev/null
+++ b/webrender/res/cs_clip_rectangle.vs.glsl
@@ -0,0 +1,77 @@
+#line 1
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+struct ClipRect {
+    vec4 rect;
+    vec4 dummy;
+};
+
+ClipRect fetch_clip_rect(int index) {
+    ClipRect rect;
+
+    ivec2 uv = get_fetch_uv_2(index);
+
+    rect.rect = texelFetchOffset(sData32, uv, 0, ivec2(0, 0));
+    //rect.dummy = texelFetchOffset(sData32, uv, 0, ivec2(1, 0));
+    rect.dummy = vec4(0.0, 0.0, 0.0, 0.0);
+
+    return rect;
+}
+
+struct ClipCorner {
+    vec4 rect;
+    vec4 outer_inner_radius;
+};
+
+ClipCorner fetch_clip_corner(int index) {
+    ClipCorner corner;
+
+    ivec2 uv = get_fetch_uv_2(index);
+
+    corner.rect = texelFetchOffset(sData32, uv, 0, ivec2(0, 0));
+    corner.outer_inner_radius = texelFetchOffset(sData32, uv, 0, ivec2(1, 0));
+
+    return corner;
+}
+
+struct ClipData {
+    ClipRect rect;
+    ClipCorner top_left;
+    ClipCorner top_right;
+    ClipCorner bottom_left;
+    ClipCorner bottom_right;
+};
+
+ClipData fetch_clip(int index) {
+    ClipData clip;
+
+    clip.rect = fetch_clip_rect(index + 0);
+    clip.top_left = fetch_clip_corner(index + 1);
+    clip.top_right = fetch_clip_corner(index + 2);
+    clip.bottom_left = fetch_clip_corner(index + 3);
+    clip.bottom_right = fetch_clip_corner(index + 4);
+
+    return clip;
+}
+
+void main(void) {
+    CacheClipInstance cci = fetch_clip_item(gl_InstanceID);
+    ClipArea area = fetch_clip_area(cci.render_task_index);
+    Layer layer = fetch_layer(cci.layer_index);
+    ClipData clip = fetch_clip(cci.data_index);
+    vec4 local_rect = clip.rect.rect;
+
+    TransformVertexInfo vi = write_clip_tile_vertex(local_rect,
+                                                    layer,
+                                                    area);
+    vLocalRect = vi.clipped_local_rect;
+    vPos = vi.local_pos;
+
+    vClipRect = vec4(local_rect.xy, local_rect.xy + local_rect.zw);
+    vClipRadius = vec4(clip.top_left.outer_inner_radius.x,
+                       clip.top_right.outer_inner_radius.x,
+                       clip.bottom_right.outer_inner_radius.x,
+                       clip.bottom_left.outer_inner_radius.x);
+}
diff --git a/webrender/res/prim_shared.glsl b/webrender/res/prim_shared.glsl
index b98d9d2b72..060216e8d9 100644
--- a/webrender/res/prim_shared.glsl
+++ b/webrender/res/prim_shared.glsl
@@ -36,6 +36,9 @@
 
 uniform sampler2DArray sCache;
 
+flat varying vec4 vClipMaskUvBounds;
+varying vec3 vClipMaskUv;
+
 #ifdef WR_VERTEX_SHADER
 
 #define VECS_PER_LAYER             13
@@ -49,7 +52,6 @@ uniform sampler2DArray sCache;
 uniform sampler2D sLayers;
 uniform sampler2D sRenderTasks;
 uniform sampler2D sPrimGeometry;
-uniform sampler2D sClips;
 
 uniform sampler2D sData16;
 uniform sampler2D sData32;
@@ -152,6 +154,26 @@ Tile fetch_tile(int index) {
     return tile;
 }
 
+struct ClipArea {
+    vec4 task_bounds;
+    vec4 screen_origin_target_index;
+};
+
+ClipArea fetch_clip_area(int index) {
+    ClipArea area;
+
+    if (index == 0x7FFFFFFF) { //special sentinel task index
+        area.task_bounds = vec4(0.0, 0.0, 0.0, 0.0);
+        area.screen_origin_target_index = vec4(0.0, 0.0, 0.0, 0.0);
+    } else {
+        RenderTaskData task = fetch_render_task(index);
+        area.task_bounds = task.data0;
+        area.screen_origin_target_index = task.data1;
+    }
+
+    return area;
+}
+
 struct Gradient {
     vec4 start_end_point;
     vec4 kind;
@@ -252,8 +274,8 @@ struct PrimitiveInstance {
     int global_prim_index;
     int specific_prim_index;
     int render_task_index;
+    int clip_task_index;
     int layer_index;
-    int clip_address;
     int sub_index;
     ivec2 user_data;
 };
@@ -269,8 +291,8 @@ PrimitiveInstance fetch_instance(int index) {
     pi.global_prim_index = data0.x;
     pi.specific_prim_index = data0.y;
     pi.render_task_index = data0.z;
-    pi.layer_index = data0.w;
-    pi.clip_address = data1.x;
+    pi.clip_task_index = data0.w;
+    pi.layer_index = data1.x;
     pi.sub_index = data1.y;
     pi.user_data = data1.zw;
 
@@ -322,10 +344,10 @@ CachePrimitiveInstance fetch_cache_instance(int index) {
 struct Primitive {
     Layer layer;
     Tile tile;
+    ClipArea clip_area;
     vec4 local_rect;
     vec4 local_clip_rect;
     int prim_index;
-    int clip_index;
     // when sending multiple primitives of the same type (e.g. border segments)
     // this index allows the vertex shader to recognize the difference
     int sub_index;
@@ -339,90 +361,19 @@ Primitive load_primitive(int index) {
 
     prim.layer = fetch_layer(pi.layer_index);
     prim.tile = fetch_tile(pi.render_task_index);
+    prim.clip_area = fetch_clip_area(pi.clip_task_index);
 
     PrimitiveGeometry pg = fetch_prim_geometry(pi.global_prim_index);
     prim.local_rect = pg.local_rect;
     prim.local_clip_rect = pg.local_clip_rect;
 
     prim.prim_index = pi.specific_prim_index;
-    prim.clip_index = pi.clip_address;
     prim.sub_index = pi.sub_index;
     prim.user_data = pi.user_data;
 
     return prim;
 }
 
-struct ClipRect {
-    vec4 rect;
-    vec4 dummy;
-};
-
-ClipRect fetch_clip_rect(int index) {
-    ClipRect rect;
-
-    ivec2 uv = get_fetch_uv_2(index);
-
-    rect.rect = texelFetchOffset(sData32, uv, 0, ivec2(0, 0));
-    //rect.dummy = texelFetchOffset(sData32, uv, 0, ivec2(1, 0));
-    rect.dummy = vec4(0.0, 0.0, 0.0, 0.0);
-
-    return rect;
-}
-
-struct ImageMaskData {
-    vec4 uv_rect;
-    vec4 local_rect;
-};
-
-ImageMaskData fetch_mask_data(int index) {
-    ImageMaskData info;
-
-    ivec2 uv = get_fetch_uv_2(index);
-
-    info.uv_rect = texelFetchOffset(sData32, uv, 0, ivec2(0, 0));
-    info.local_rect = texelFetchOffset(sData32, uv, 0, ivec2(1, 0));
-
-    return info;
-}
-
-struct ClipCorner {
-    vec4 rect;
-    vec4 outer_inner_radius;
-};
-
-ClipCorner fetch_clip_corner(int index) {
-    ClipCorner corner;
-
-    ivec2 uv = get_fetch_uv_2(index);
-
-    corner.rect = texelFetchOffset(sData32, uv, 0, ivec2(0, 0));
-    corner.outer_inner_radius = texelFetchOffset(sData32, uv, 0, ivec2(1, 0));
-
-    return corner;
-}
-
-struct ClipData {
-    ClipRect rect;
-    ClipCorner top_left;
-    ClipCorner top_right;
-    ClipCorner bottom_left;
-    ClipCorner bottom_right;
-    ImageMaskData mask_data;
-};
-
-ClipData fetch_clip(int index) {
-    ClipData clip;
-
-    clip.rect = fetch_clip_rect(index + 0);
-    clip.top_left = fetch_clip_corner(index + 1);
-    clip.top_right = fetch_clip_corner(index + 2);
-    clip.bottom_left = fetch_clip_corner(index + 3);
-    clip.bottom_right = fetch_clip_corner(index + 4);
-    clip.mask_data = fetch_mask_data(index + 5);
-
-    return clip;
-}
-
 // Return the intersection of the plane (set up by "normal" and "point")
 // with the ray (set up by "ray_origin" and "ray_dir"),
 // writing the resulting scaler into "t".
@@ -497,7 +448,7 @@ VertexInfo write_vertex(vec4 instance_rect,
 
     local_pos = clamp_rect(local_pos, layer.local_clip_rect);
 
-    vec4 world_pos = layer.transform * vec4(local_pos, 0, 1);
+    vec4 world_pos = layer.transform * vec4(local_pos, 0.0, 1.0);
     world_pos.xyz /= world_pos.w;
 
     vec2 device_pos = world_pos.xy * uDevicePixelRatio;
@@ -521,6 +472,7 @@ VertexInfo write_vertex(vec4 instance_rect,
 
 struct TransformVertexInfo {
     vec3 local_pos;
+    vec2 global_clamped_pos;
     vec4 clipped_local_rect;
 };
 
@@ -577,9 +529,9 @@ TransformVertexInfo write_transform_vertex(vec4 instance_rect,
     // apply the task offset
     vec2 final_pos = clamped_pos + tile.screen_origin_task_origin.zw - tile.screen_origin_task_origin.xy;
 
-    gl_Position = uTransform * vec4(final_pos, 0, 1);
+    gl_Position = uTransform * vec4(final_pos, 0.0, 1.0);
 
-    return TransformVertexInfo(layer_pos.xyw, clipped_local_rect);
+    return TransformVertexInfo(layer_pos.xyw, clamped_pos, clipped_local_rect);
 }
 
 #endif //WR_FEATURE_TRANSFORM
@@ -675,7 +627,14 @@ Composite fetch_composite(int index) {
 
     return composite;
 }
-#endif
+
+void write_clip(vec2 global_pos, ClipArea area) {
+    vec2 texture_size = textureSize(sCache, 0).xy;
+    vec2 uv = global_pos + area.task_bounds.xy - area.screen_origin_target_index.xy;
+    vClipMaskUvBounds = area.task_bounds / texture_size.xyxy;
+    vClipMaskUv = vec3(uv / texture_size, area.screen_origin_target_index.z);
+}
+#endif //WR_VERTEX_SHADER
 
 #ifdef WR_FRAGMENT_SHADER
 float distance_from_rect(vec2 p, vec2 origin, vec2 size) {
@@ -695,4 +654,14 @@ vec2 init_transform_fs(vec3 local_pos, vec4 local_rect, out float fragment_alpha
 
     return pos;
 }
-#endif
+
+float do_clip() {
+    // anything outside of the mask is considered transparent
+    bvec4 inside = lessThanEqual(
+        vec4(vClipMaskUvBounds.xy, vClipMaskUv.xy),
+        vec4(vClipMaskUv.xy, vClipMaskUvBounds.zw));
+    // check for the dummy bounds, which are given to the opaque objects
+    return vClipMaskUvBounds.xy == vClipMaskUvBounds.zw ? 1.0:
+        all(inside) ? textureLod(sCache, vClipMaskUv, 0).a : 0.0;
+}
+#endif //WR_FRAGMENT_SHADER
diff --git a/webrender/res/ps_clear.fs.glsl b/webrender/res/ps_clear.fs.glsl
index 5ad3065f78..bbdd2e6457 100644
--- a/webrender/res/ps_clear.fs.glsl
+++ b/webrender/res/ps_clear.fs.glsl
@@ -3,5 +3,5 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
-    oFragColor = vec4(1, 1, 1, 1);
+    oFragColor = vec4(1.0, 1.0, 1.0, 1.0);
 }
diff --git a/webrender/res/ps_gradient.fs.glsl b/webrender/res/ps_gradient.fs.glsl
index 512d3d1db6..d2c705af49 100644
--- a/webrender/res/ps_gradient.fs.glsl
+++ b/webrender/res/ps_gradient.fs.glsl
@@ -11,5 +11,6 @@ void main(void) {
     vec2 local_pos = vPos;
 #endif
 
-    oFragColor = vColor * vec4(1, 1, 1, alpha);
+    alpha = min(alpha, do_clip());
+    oFragColor = vColor * vec4(1.0, 1.0, 1.0, alpha);
 }
diff --git a/webrender/res/ps_gradient.vs.glsl b/webrender/res/ps_gradient.vs.glsl
index a5e6bb0f3d..37d914a21d 100644
--- a/webrender/res/ps_gradient.vs.glsl
+++ b/webrender/res/ps_gradient.vs.glsl
@@ -54,6 +54,8 @@ void main(void) {
     vPos = vi.local_clamped_pos;
 #endif
 
+    write_clip(vi.global_clamped_pos, prim.clip_area);
+
     switch (int(gradient.kind.x)) {
         case GRADIENT_HORIZONTAL:
             vColor = mix(g0.color, g1.color, f.x);
diff --git a/webrender/res/ps_gradient_clip.fs.glsl b/webrender/res/ps_gradient_clip.fs.glsl
deleted file mode 100644
index c99c6e55d6..0000000000
--- a/webrender/res/ps_gradient_clip.fs.glsl
+++ /dev/null
@@ -1,16 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-void main(void) {
-#ifdef WR_FEATURE_TRANSFORM
-    float alpha = 0.0;
-    vec2 local_pos = init_transform_fs(vLocalPos, vLocalRect, alpha);
-#else
-    float alpha = 1.0;
-    vec2 local_pos = vPos;
-#endif
-
-    alpha = min(alpha, do_clip(local_pos));
-    oFragColor = vColor * vec4(1, 1, 1, alpha);
-}
diff --git a/webrender/res/ps_gradient_clip.vs.glsl b/webrender/res/ps_gradient_clip.vs.glsl
deleted file mode 100644
index 8c20dfdbfa..0000000000
--- a/webrender/res/ps_gradient_clip.vs.glsl
+++ /dev/null
@@ -1,71 +0,0 @@
-#line 1
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-void main(void) {
-    Primitive prim = load_primitive(gl_InstanceID);
-    Gradient gradient = fetch_gradient(prim.prim_index);
-
-    GradientStop g0 = fetch_gradient_stop(prim.sub_index + 0);
-    GradientStop g1 = fetch_gradient_stop(prim.sub_index + 1);
-
-    vec4 segment_rect;
-    switch (int(gradient.kind.x)) {
-        case GRADIENT_HORIZONTAL:
-            float x0 = mix(gradient.start_end_point.x,
-                           gradient.start_end_point.z,
-                           g0.offset.x);
-            float x1 = mix(gradient.start_end_point.x,
-                           gradient.start_end_point.z,
-                           g1.offset.x);
-            segment_rect.yw = prim.local_rect.yw;
-            segment_rect.x = x0;
-            segment_rect.z = x1 - x0;
-            break;
-        case GRADIENT_VERTICAL:
-            float y0 = mix(gradient.start_end_point.y,
-                           gradient.start_end_point.w,
-                           g0.offset.x);
-            float y1 = mix(gradient.start_end_point.y,
-                           gradient.start_end_point.w,
-                           g1.offset.x);
-            segment_rect.xz = prim.local_rect.xz;
-            segment_rect.y = y0;
-            segment_rect.w = y1 - y0;
-            break;
-    }
-
-#ifdef WR_FEATURE_TRANSFORM
-    TransformVertexInfo vi = write_transform_vertex(segment_rect,
-                                                    prim.local_clip_rect,
-                                                    prim.layer,
-                                                    prim.tile);
-    vLocalRect = vi.clipped_local_rect;
-    vLocalPos = vi.local_pos;
-    vec2 f = (vi.local_pos.xy - prim.local_rect.xy) / prim.local_rect.zw;
-#else
-    VertexInfo vi = write_vertex(segment_rect,
-                                 prim.local_clip_rect,
-                                 prim.layer,
-                                 prim.tile);
-
-    vec2 f = (vi.local_clamped_pos - segment_rect.xy) / segment_rect.zw;
-    vPos = vi.local_clamped_pos;
-#endif
-
-    switch (int(gradient.kind.x)) {
-        case GRADIENT_HORIZONTAL:
-            vColor = mix(g0.color, g1.color, f.x);
-            break;
-        case GRADIENT_VERTICAL:
-            vColor = mix(g0.color, g1.color, f.y);
-            break;
-        case GRADIENT_ROTATED:
-            vColor = vec4(1.0, 0.0, 1.0, 1.0);
-            break;
-    }
-
-    ClipData clip = fetch_clip(prim.clip_index);
-    write_clip(clip);
-}
diff --git a/webrender/res/ps_image.fs.glsl b/webrender/res/ps_image.fs.glsl
index f425fb3398..6d19273220 100644
--- a/webrender/res/ps_image.fs.glsl
+++ b/webrender/res/ps_image.fs.glsl
@@ -14,10 +14,12 @@ void main(void) {
     vec2 relative_pos_in_rect =
          clamp(pos, vLocalRect.xy, vLocalRect.xy + vLocalRect.zw) - vLocalRect.xy;
 #else
-    float alpha = 1.0;;
+    float alpha = 1.0;
     vec2 relative_pos_in_rect = vLocalPos;
 #endif
 
+    alpha = min(alpha, do_clip());
+
     // We calculate the particular tile this fragment belongs to, taking into
     // account the spacing in between tiles. We only paint if our fragment does
     // not fall into that spacing.
@@ -25,5 +27,5 @@ void main(void) {
     vec2 st = vTextureOffset + ((position_in_tile / vStretchSize) * vTextureSize);
     alpha = alpha * float(all(bvec2(step(position_in_tile, vStretchSize))));
 
-    oFragColor = vec4(1, 1, 1, alpha) * texture(sColor0, st);
+    oFragColor = vec4(1.0, 1.0, 1.0, alpha) * texture(sColor0, st);
 }
diff --git a/webrender/res/ps_image.vs.glsl b/webrender/res/ps_image.vs.glsl
index a3eb7808e5..2bcd0b4953 100644
--- a/webrender/res/ps_image.vs.glsl
+++ b/webrender/res/ps_image.vs.glsl
@@ -22,6 +22,8 @@ void main(void) {
     vLocalPos = vi.local_clamped_pos - vi.local_rect.p0;
 #endif
 
+    write_clip(vi.global_clamped_pos, prim.clip_area);
+
     // vUv will contain how many times this image has wrapped around the image size.
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = image.st_rect.xy / texture_size;
diff --git a/webrender/res/ps_image_clip.fs.glsl b/webrender/res/ps_image_clip.fs.glsl
deleted file mode 100644
index 5d48c94b83..0000000000
--- a/webrender/res/ps_image_clip.fs.glsl
+++ /dev/null
@@ -1,32 +0,0 @@
-#line 1
-
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-void main(void) {
-#ifdef WR_FEATURE_TRANSFORM
-    float alpha = 1.f;
-    vec2 local_pos = init_transform_fs(vLocalPos, vLocalRect, alpha);
-
-    // We clamp the texture coordinate calculation here to the local rectangle boundaries,
-    // which makes the edge of the texture stretch instead of repeat.
-    vec2 relative_pos_in_rect =
-         clamp(local_pos, vLocalRect.xy, vLocalRect.xy + vLocalRect.zw) - vLocalRect.xy;
-#else
-    float alpha = 1.f;
-    vec2 local_pos = vLocalPos;
-    vec2 relative_pos_in_rect = vLocalPos - vLocalRect.xy;
-#endif
-
-    alpha = min(alpha, do_clip(local_pos));
-
-    // We calculate the particular tile this fragment belongs to, taking into
-    // account the spacing in between tiles. We only paint if our fragment does
-    // not fall into that spacing.
-    vec2 position_in_tile = mod(relative_pos_in_rect, vStretchSize + vTileSpacing);
-    vec2 st = vTextureOffset + ((position_in_tile / vStretchSize) * vTextureSize);
-    alpha = alpha * float(all(bvec2(step(position_in_tile, vStretchSize))));
-
-    oFragColor = texture(sColor0, st) * vec4(1, 1, 1, alpha);
-}
diff --git a/webrender/res/ps_image_clip.glsl b/webrender/res/ps_image_clip.glsl
deleted file mode 100644
index 20c0343182..0000000000
--- a/webrender/res/ps_image_clip.glsl
+++ /dev/null
@@ -1,17 +0,0 @@
-#line 1
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-flat varying vec2 vTextureOffset; // Offset of this image into the texture atlas.
-flat varying vec2 vTextureSize;   // Size of the image in the texture atlas.
-flat varying vec2 vTileSpacing;   // Amount of space between tiled instances of this image.
-flat varying vec2 vStretchSize;
-flat varying vec4 vLocalRect;
-
-#ifdef WR_FEATURE_TRANSFORM
-varying vec3 vLocalPos;
-#else
-varying vec2 vLocalPos;
-varying vec2 vUv;                 // Location within the CSS box to draw.
-#endif
diff --git a/webrender/res/ps_image_clip.vs.glsl b/webrender/res/ps_image_clip.vs.glsl
deleted file mode 100644
index bfbf54bc85..0000000000
--- a/webrender/res/ps_image_clip.vs.glsl
+++ /dev/null
@@ -1,38 +0,0 @@
-#line 1
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-void main(void) {
-    Primitive prim = load_primitive(gl_InstanceID);
-    Image image = fetch_image(prim.prim_index);
-
-#ifdef WR_FEATURE_TRANSFORM
-    TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
-                                                    prim.local_clip_rect,
-                                                    prim.layer,
-                                                    prim.tile);
-    vLocalRect = vi.clipped_local_rect;
-    vLocalPos = vi.local_pos;
-#else
-    VertexInfo vi = write_vertex(prim.local_rect,
-                                 prim.local_clip_rect,
-                                 prim.layer,
-                                 prim.tile);
-    vLocalRect = prim.local_rect;
-    vLocalPos = vi.local_clamped_pos;
-#endif
-
-    ClipData clip = fetch_clip(prim.clip_index);
-    write_clip(clip);
-
-    // vUv will contain how many times this image has wrapped around the image size.
-    vec2 texture_size = vec2(textureSize(sColor0, 0));
-    vec2 st0 = image.st_rect.xy / texture_size;
-    vec2 st1 = image.st_rect.zw / texture_size;
-
-    vTextureSize = st1 - st0;
-    vTextureOffset = st0;
-    vTileSpacing = image.stretch_size_and_tile_spacing.zw;
-    vStretchSize = image.stretch_size_and_tile_spacing.xy;
-}
diff --git a/webrender/res/ps_rectangle.fs.glsl b/webrender/res/ps_rectangle.fs.glsl
index d65fe2581d..c0defc2999 100644
--- a/webrender/res/ps_rectangle.fs.glsl
+++ b/webrender/res/ps_rectangle.fs.glsl
@@ -3,11 +3,12 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
+    float alpha = 1.0;
 #ifdef WR_FEATURE_TRANSFORM
-    float alpha = 0.0;
+    alpha = 0.0;
     init_transform_fs(vLocalPos, vLocalRect, alpha);
-    oFragColor = vec4(1, 1, 1, alpha) * vColor;
-#else
-    oFragColor = vColor;
 #endif
+
+    alpha = min(alpha, do_clip());
+    oFragColor = vColor * vec4(1.0, 1.0, 1.0, alpha);
 }
diff --git a/webrender/res/ps_rectangle.vs.glsl b/webrender/res/ps_rectangle.vs.glsl
index 0c886b6fc4..fc98c2aa21 100644
--- a/webrender/res/ps_rectangle.vs.glsl
+++ b/webrender/res/ps_rectangle.vs.glsl
@@ -15,9 +15,11 @@ void main(void) {
     vLocalRect = vi.clipped_local_rect;
     vLocalPos = vi.local_pos;
 #else
-    write_vertex(prim.local_rect,
-                 prim.local_clip_rect,
-                 prim.layer,
-                 prim.tile);
+    VertexInfo vi = write_vertex(prim.local_rect,
+                                 prim.local_clip_rect,
+                                 prim.layer,
+                                 prim.tile);
 #endif
+
+    write_clip(vi.global_clamped_pos, prim.clip_area);
 }
diff --git a/webrender/res/ps_rectangle_clip.fs.glsl b/webrender/res/ps_rectangle_clip.fs.glsl
deleted file mode 100644
index cca1841d4b..0000000000
--- a/webrender/res/ps_rectangle_clip.fs.glsl
+++ /dev/null
@@ -1,15 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-void main(void) {
-    float alpha = 1.f;
-#ifdef WR_FEATURE_TRANSFORM
-    vec2 local_pos = init_transform_fs(vPos, vLocalRect, alpha);
-#else
-    vec2 local_pos = vPos;
-#endif
-
-    alpha = min(alpha, do_clip(local_pos));
-    oFragColor = vColor * vec4(1, 1, 1, alpha);
-}
diff --git a/webrender/res/ps_rectangle_clip.vs.glsl b/webrender/res/ps_rectangle_clip.vs.glsl
deleted file mode 100644
index 1cc281ac61..0000000000
--- a/webrender/res/ps_rectangle_clip.vs.glsl
+++ /dev/null
@@ -1,27 +0,0 @@
-#line 1
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-void main(void) {
-    Primitive prim = load_primitive(gl_InstanceID);
-    Rectangle rect = fetch_rectangle(prim.prim_index);
-    vColor = rect.color;
-#ifdef WR_FEATURE_TRANSFORM
-    TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
-                                                    prim.local_clip_rect,
-                                                    prim.layer,
-                                                    prim.tile);
-    vLocalRect = vi.clipped_local_rect;
-    vPos = vi.local_pos;
-#else
-    VertexInfo vi = write_vertex(prim.local_rect,
-                                 prim.local_clip_rect,
-                                 prim.layer,
-                                 prim.tile);
-    vPos = vi.local_clamped_pos;
-#endif
-
-    ClipData clip = fetch_clip(prim.clip_index);
-    write_clip(clip);
-}
diff --git a/webrender/res/ps_text_run.fs.glsl b/webrender/res/ps_text_run.fs.glsl
index a3dcacced4..e7049741bf 100644
--- a/webrender/res/ps_text_run.fs.glsl
+++ b/webrender/res/ps_text_run.fs.glsl
@@ -4,14 +4,17 @@
 
 void main(void) {
 #ifdef WR_FEATURE_SUBPIXEL_AA
+    //note: the blend mode is not compatible with clipping
     oFragColor = texture(sColor0, vUv);
 #else
-    float a = texture(sColor0, vUv).a;
+    float alpha = texture(sColor0, vUv).a;
 #ifdef WR_FEATURE_TRANSFORM
-    float alpha = 0.0;
-    init_transform_fs(vLocalPos, vLocalRect, alpha);
-    a *= alpha;
+    float a = 0.0;
+    init_transform_fs(vLocalPos, vLocalRect, a);
+    alpha *= a;
 #endif
-    oFragColor = vec4(vColor.rgb, vColor.a * a);
+    vec4 color = vColor;
+    alpha = min(alpha, do_clip());
+    oFragColor = vec4(vColor.rgb, vColor.a * alpha);
 #endif
 }
diff --git a/webrender/res/ps_text_run.vs.glsl b/webrender/res/ps_text_run.vs.glsl
index 9adfbffc76..4102b50744 100644
--- a/webrender/res/ps_text_run.vs.glsl
+++ b/webrender/res/ps_text_run.vs.glsl
@@ -25,6 +25,8 @@ void main(void) {
     vec2 f = (vi.local_clamped_pos - vi.local_rect.p0) / (vi.local_rect.p1 - vi.local_rect.p0);
 #endif
 
+    write_clip(vi.global_clamped_pos, prim.clip_area);
+
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = glyph.uv_rect.xy / texture_size;
     vec2 st1 = glyph.uv_rect.zw / texture_size;
diff --git a/webrender/src/debug_render.rs b/webrender/src/debug_render.rs
index e4ca5d769a..6fd626f7fe 100644
--- a/webrender/src/debug_render.rs
+++ b/webrender/src/debug_render.rs
@@ -161,22 +161,19 @@ impl DebugRenderer {
     pub fn render(&mut self,
                   device: &mut Device,
                   viewport_size: &Size2D<u32>) {
-        if !self.font_indices.is_empty() ||
-           !self.line_vertices.is_empty() ||
-           !self.tri_vertices.is_empty() {
-
-            device.disable_depth();
-            device.set_blend(true);
-            device.set_blend_mode_alpha();
-
-            let projection = Matrix4D::ortho(0.0,
-                                             viewport_size.width as f32,
-                                             viewport_size.height as f32,
-                                             0.0,
-                                             ORTHO_NEAR_PLANE,
-                                             ORTHO_FAR_PLANE);
-
-            // Triangles
+        device.disable_depth();
+        device.set_blend(true);
+        device.set_blend_mode_alpha();
+
+        let projection = Matrix4D::ortho(0.0,
+                                         viewport_size.width as f32,
+                                         viewport_size.height as f32,
+                                         0.0,
+                                         ORTHO_NEAR_PLANE,
+                                         ORTHO_FAR_PLANE);
+
+        // Triangles
+        if !self.tri_vertices.is_empty() {
             device.bind_program(self.color_program_id, &projection);
             device.bind_vao(self.tri_vao);
             device.update_vao_indices(self.tri_vao,
@@ -186,15 +183,20 @@ impl DebugRenderer {
                                             &self.tri_vertices,
                                             VertexUsageHint::Dynamic);
             device.draw_triangles_u32(0, self.tri_indices.len() as i32);
+        }
 
-            // Lines
+        // Lines
+        if !self.line_vertices.is_empty() {
+            device.bind_program(self.color_program_id, &projection);
             device.bind_vao(self.line_vao);
             device.update_vao_main_vertices(self.line_vao,
                                             &self.line_vertices,
                                             VertexUsageHint::Dynamic);
             device.draw_nonindexed_lines(0, self.line_vertices.len() as i32);
+        }
 
-            // Glyphs
+        // Glyph
+        if !self.font_indices.is_empty() {
             device.bind_program(self.font_program_id, &projection);
             device.bind_texture(TextureSampler::Color0, self.font_texture_id);
             device.bind_vao(self.font_vao);
diff --git a/webrender/src/device.rs b/webrender/src/device.rs
index 7df16057c2..c7a2e6d3cc 100644
--- a/webrender/src/device.rs
+++ b/webrender/src/device.rs
@@ -1724,6 +1724,12 @@ impl Device {
         gl::blend_color(color.r, color.g, color.b, color.a);
         gl::blend_func(gl::CONSTANT_COLOR, gl::ONE_MINUS_SRC_COLOR);
     }
+
+    pub fn set_blend_mode_multiply(&self) {
+        gl::blend_func_separate(gl::ZERO, gl::SRC_COLOR,
+                                gl::ZERO, gl::SRC_ALPHA);
+        gl::blend_equation(gl::FUNC_ADD);
+    }
 }
 
 impl Drop for Device {
diff --git a/webrender/src/frame.rs b/webrender/src/frame.rs
index 4e087abf65..e31f6345d4 100644
--- a/webrender/src/frame.rs
+++ b/webrender/src/frame.rs
@@ -11,7 +11,7 @@ use internal_types::{CompositionOp};
 use internal_types::{LowLevelFilterOp};
 use internal_types::{RendererFrame};
 use layer::{Layer, ScrollingState};
-use resource_cache::{DummyResources, ResourceCache};
+use resource_cache::ResourceCache;
 use scene::Scene;
 use std::collections::{HashMap, HashSet};
 use std::hash::BuildHasherDefault;
@@ -397,7 +397,6 @@ impl Frame {
 
     pub fn create(&mut self,
                   scene: &Scene,
-                  dummy_resources: &DummyResources,
                   pipeline_sizes: &mut HashMap<PipelineId, Size2D<f32>>,
                   device_pixel_ratio: f32) {
         let root_pipeline_id = match scene.root_pipeline_id {
@@ -447,7 +446,6 @@ impl Frame {
 
         let mut frame_builder = FrameBuilder::new(root_pipeline.viewport_size,
                                                   device_pixel_ratio,
-                                                  dummy_resources.clone(),
                                                   self.debug,
                                                   self.frame_builder_config);
 
diff --git a/webrender/src/gpu_store.rs b/webrender/src/gpu_store.rs
index 42f176304c..6b2d2e1577 100644
--- a/webrender/src/gpu_store.rs
+++ b/webrender/src/gpu_store.rs
@@ -5,7 +5,7 @@
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 use std::mem;
 
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
 pub struct GpuStoreAddress(pub i32);
 
 /// A CPU-side buffer storing content to be uploaded to the GPU.
diff --git a/webrender/src/internal_types.rs b/webrender/src/internal_types.rs
index 936e85f6ce..59cb756c11 100644
--- a/webrender/src/internal_types.rs
+++ b/webrender/src/internal_types.rs
@@ -223,14 +223,12 @@ impl TextureSampler {
 #[derive(Copy, Clone, Debug)]
 pub struct BatchTextures {
     pub colors: [SourceTexture; 3],
-    pub mask: SourceTexture,
 }
 
 impl BatchTextures {
     pub fn no_texture() -> Self {
         BatchTextures {
             colors: [SourceTexture::Invalid; 3],
-            mask: SourceTexture::Invalid,
         }
     }
 }
@@ -426,7 +424,7 @@ pub enum AxisDirection {
     Vertical,
 }
 
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq)]
 pub struct StackingContextIndex(pub usize);
 
 #[derive(Clone, Copy, Debug)]
diff --git a/webrender/src/lib.rs b/webrender/src/lib.rs
index 3b6c6d1f6c..202c8305f0 100644
--- a/webrender/src/lib.rs
+++ b/webrender/src/lib.rs
@@ -60,6 +60,7 @@ mod geometry;
 mod gpu_store;
 mod internal_types;
 mod layer;
+mod mask_cache;
 mod prim_store;
 mod profiler;
 mod record;
diff --git a/webrender/src/mask_cache.rs b/webrender/src/mask_cache.rs
new file mode 100644
index 0000000000..49276f2406
--- /dev/null
+++ b/webrender/src/mask_cache.rs
@@ -0,0 +1,141 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use euclid::{Rect, Matrix4D};
+use gpu_store::{GpuStore, GpuStoreAddress};
+use internal_types::DeviceRect;
+use prim_store::{ClipData, GpuBlock32, PrimitiveClipSource, PrimitiveStore};
+use prim_store::{CLIP_DATA_GPU_SIZE, MASK_DATA_GPU_SIZE};
+use tiling::StackingContextIndex;
+use util::TransformedRect;
+use webrender_traits::{AuxiliaryLists, ImageMask};
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub struct ClipAddressRange {
+    pub start: GpuStoreAddress,
+    pub item_count: u32,
+}
+
+type ImageMaskIndex = u16;
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub struct MaskCacheKey {
+    pub layer_id: StackingContextIndex,
+    pub clip_range: ClipAddressRange,
+    pub image: Option<GpuStoreAddress>,
+}
+
+impl MaskCacheKey {
+    pub fn empty(layer_id: StackingContextIndex) -> MaskCacheKey {
+        MaskCacheKey {
+            layer_id: layer_id,
+            clip_range: ClipAddressRange {
+                start: GpuStoreAddress(0),
+                item_count: 0,
+            },
+            image: None,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct MaskCacheInfo {
+    pub key: MaskCacheKey,
+    // this is needed to update the ImageMaskData after the
+    // ResourceCache allocates/load the actual data
+    // will be simplified after the TextureCache upgrade
+    pub image: Option<ImageMask>,
+    pub device_rect: DeviceRect,
+    pub local_rect: Option<Rect<f32>>,
+}
+
+impl MaskCacheInfo {
+    /// Create a new mask cache info. It allocates the GPU store data but leaves
+    /// it unitialized for the following `update()` call to deal with.
+    pub fn new(source: &PrimitiveClipSource,
+               layer_id: StackingContextIndex,
+               clip_store: &mut GpuStore<GpuBlock32>)
+               -> Option<MaskCacheInfo> {
+        let mut clip_key = MaskCacheKey::empty(layer_id);
+
+        let image = match source {
+            &PrimitiveClipSource::NoClip => None,
+            &PrimitiveClipSource::Complex(..) => {
+                clip_key.clip_range.item_count = 1;
+                clip_key.clip_range.start = clip_store.alloc(CLIP_DATA_GPU_SIZE);
+                None
+            }
+            &PrimitiveClipSource::Region(ref region) => {
+                let num = region.complex.length;
+                if num != 0 {
+                    clip_key.clip_range.item_count = num as u32;
+                    clip_key.clip_range.start = clip_store.alloc(CLIP_DATA_GPU_SIZE * num);
+                }
+                if region.image_mask.is_some() {
+                    let address = clip_store.alloc(MASK_DATA_GPU_SIZE);
+                    clip_key.image = Some(address);
+                }
+                region.image_mask
+            }
+        };
+
+        if clip_key.clip_range.item_count != 0 || clip_key.image.is_some() {
+            Some(MaskCacheInfo {
+                key: clip_key,
+                image: image,
+                local_rect: None,
+                device_rect: DeviceRect::zero(),
+            })
+        } else {
+            None
+        }
+    }
+
+    pub fn update(&mut self,
+                  source: &PrimitiveClipSource,
+                  transform: &Matrix4D<f32>,
+                  clip_rect: &Rect<f32>,
+                  clip_store: &mut GpuStore<GpuBlock32>,
+                  device_pixel_ratio: f32,
+                  aux_lists: &AuxiliaryLists) {
+
+        if self.local_rect.is_none() {
+            let mut local_rect = Some(clip_rect.clone());
+            match source {
+                &PrimitiveClipSource::NoClip => (),
+                &PrimitiveClipSource::Complex(rect, radius) => {
+                    let slice = clip_store.get_slice_mut(self.key.clip_range.start, CLIP_DATA_GPU_SIZE);
+                    let data = ClipData::uniform(rect, radius);
+                    PrimitiveStore::populate_clip_data(slice, data);
+                    debug_assert_eq!(self.key.clip_range.item_count, 1);
+                    local_rect = local_rect.and_then(|r| r.intersection(&rect));
+                }
+                &PrimitiveClipSource::Region(ref region) => {
+                    let clips = aux_lists.complex_clip_regions(&region.complex);
+                    assert_eq!(self.key.clip_range.item_count, clips.len() as u32);
+                    if !clips.is_empty() {
+                        let slice = clip_store.get_slice_mut(self.key.clip_range.start, CLIP_DATA_GPU_SIZE * clips.len());
+                        for (clip, chunk) in clips.iter().zip(slice.chunks_mut(CLIP_DATA_GPU_SIZE)) {
+                            let data = ClipData::from_clip_region(clip);
+                            PrimitiveStore::populate_clip_data(chunk, data);
+                            local_rect = local_rect.and_then(|r| r.intersection(&clip.rect));
+                        }
+                    }
+                    match region.image_mask {
+                        Some(ref mask) if !mask.repeat => {
+                            local_rect = local_rect.and_then(|r| r.intersection(&mask.rect));
+                        },
+                        _ => ()
+                    }
+                }
+            };
+            self.local_rect = Some(local_rect.unwrap_or(Rect::zero()));
+        }
+
+        let transformed = TransformedRect::new(self.local_rect.as_ref().unwrap(),
+                                               &transform,
+                                               device_pixel_ratio);
+        self.device_rect = transformed.bounding_rect;
+    }
+}
diff --git a/webrender/src/prim_store.rs b/webrender/src/prim_store.rs
index 70a706f895..c4aa89285f 100644
--- a/webrender/src/prim_store.rs
+++ b/webrender/src/prim_store.rs
@@ -6,15 +6,18 @@ use app_units::Au;
 use euclid::{Point2D, Matrix4D, Rect, Size2D};
 use gpu_store::{GpuStore, GpuStoreAddress};
 use internal_types::{device_pixel, DeviceRect, DeviceSize, SourceTexture};
+use mask_cache::{MaskCacheInfo, MaskCacheKey};
 use resource_cache::ResourceCache;
 use std::mem;
 use std::usize;
-use texture_cache::TextureCacheItem;
 use tiling::RenderTask;
 use util::TransformedRect;
 use webrender_traits::{AuxiliaryLists, ColorF, ImageKey, ImageRendering};
-use webrender_traits::{FontRenderMode, WebGLContextId};
-use webrender_traits::{ClipRegion, FontKey, ItemRange, ComplexClipRegion, GlyphKey};
+use webrender_traits::{ClipRegion, ComplexClipRegion, ItemRange, GlyphKey};
+use webrender_traits::{FontKey, FontRenderMode, WebGLContextId};
+
+pub const CLIP_DATA_GPU_SIZE: usize = 5;
+pub const MASK_DATA_GPU_SIZE: usize = 1;
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub struct SpecificPrimitiveIndex(pub usize);
@@ -62,9 +65,8 @@ pub enum PrimitiveClipSource {
 #[derive(Debug)]
 pub struct PrimitiveMetadata {
     pub is_opaque: bool,
-    pub mask_texture_id: SourceTexture,
-    pub clip_index: Option<GpuStoreAddress>,
     pub clip_source: Box<PrimitiveClipSource>,
+    pub clip_cache_info: Option<MaskCacheInfo>,
     pub prim_kind: PrimitiveKind,
     pub cpu_prim_index: SpecificPrimitiveIndex,
     pub gpu_prim_index: GpuStoreAddress,
@@ -229,7 +231,7 @@ impl ClipCorner {
 }
 
 #[derive(Debug, Clone)]
-struct ImageMaskData {
+pub struct ImageMaskData {
     uv_rect: Rect<f32>,
     local_rect: Rect<f32>,
 }
@@ -241,7 +243,6 @@ pub struct ClipData {
     top_right: ClipCorner,
     bottom_left: ClipCorner,
     bottom_right: ClipCorner,
-    mask_data: ImageMaskData,
 }
 
 impl ClipData {
@@ -286,10 +287,6 @@ impl ClipData {
                 inner_radius_x: 0.0,
                 inner_radius_y: 0.0,
             },
-            mask_data: ImageMaskData {
-                uv_rect: Rect::zero(),
-                local_rect: Rect::zero(),
-            },
         }
     }
 
@@ -319,10 +316,6 @@ impl ClipData {
                                                         Size2D::new(radius, radius)),
                                               radius,
                                               0.0),
-            mask_data: ImageMaskData {
-                uv_rect: Rect::zero(),
-                local_rect: Rect::zero(),
-            },
         }
     }
 }
@@ -377,33 +370,22 @@ impl PrimitiveStore {
         }
     }
 
-    fn populate_clip_data(data: &mut [GpuBlock32], clip: ClipData) {
+    pub fn populate_clip_data(data: &mut [GpuBlock32], clip: ClipData) {
         data[0] = GpuBlock32::from(clip.rect);
         data[1] = GpuBlock32::from(clip.top_left);
         data[2] = GpuBlock32::from(clip.top_right);
         data[3] = GpuBlock32::from(clip.bottom_left);
         data[4] = GpuBlock32::from(clip.bottom_right);
-        data[5] = GpuBlock32::from(clip.mask_data);
     }
 
     pub fn add_primitive(&mut self,
-                         rect: &Rect<f32>,
-                         clip: &ClipRegion,
+                         geometry: PrimitiveGeometry,
+                         clip_source: Box<PrimitiveClipSource>,
+                         clip_info: Option<MaskCacheInfo>,
                          container: PrimitiveContainer) -> PrimitiveIndex {
         let prim_index = self.cpu_metadata.len();
-
         self.cpu_bounding_rects.push(None);
-
-        self.gpu_geometry.push(PrimitiveGeometry {
-            local_rect: *rect,
-            local_clip_rect: clip.main.clone(),
-        });
-
-        let clip_source = Box::new(if clip.is_complex() {
-            PrimitiveClipSource::Region(clip.clone())
-        } else {
-            PrimitiveClipSource::NoClip
-        });
+        self.gpu_geometry.push(geometry);
 
         let metadata = match container {
             PrimitiveContainer::Rectangle(rect) => {
@@ -412,9 +394,8 @@ impl PrimitiveStore {
 
                 let metadata = PrimitiveMetadata {
                     is_opaque: is_opaque,
-                    mask_texture_id: SourceTexture::Invalid,
-                    clip_index: None,
                     clip_source: clip_source,
+                    clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Rectangle,
                     cpu_prim_index: SpecificPrimitiveIndex::invalid(),
                     gpu_prim_index: gpu_address,
@@ -431,9 +412,8 @@ impl PrimitiveStore {
 
                 let metadata = PrimitiveMetadata {
                     is_opaque: false,
-                    mask_texture_id: SourceTexture::Invalid,
-                    clip_index: None,
                     clip_source: clip_source,
+                    clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::TextRun,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_text_runs.len()),
                     gpu_prim_index: gpu_address,
@@ -450,9 +430,8 @@ impl PrimitiveStore {
 
                 let metadata = PrimitiveMetadata {
                     is_opaque: false,
-                    mask_texture_id: SourceTexture::Invalid,
-                    clip_index: None,
                     clip_source: clip_source,
+                    clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Image,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_images.len()),
                     gpu_prim_index: gpu_address,
@@ -469,9 +448,8 @@ impl PrimitiveStore {
 
                 let metadata = PrimitiveMetadata {
                     is_opaque: false,
-                    mask_texture_id: SourceTexture::Invalid,
-                    clip_index: None,
                     clip_source: clip_source,
+                    clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Border,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_borders.len()),
                     gpu_prim_index: gpu_address,
@@ -489,9 +467,8 @@ impl PrimitiveStore {
 
                 let metadata = PrimitiveMetadata {
                     is_opaque: false,
-                    mask_texture_id: SourceTexture::Invalid,
-                    clip_index: None,
                     clip_source: clip_source,
+                    clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Gradient,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_gradients.len()),
                     gpu_prim_index: gpu_address,
@@ -538,9 +515,8 @@ impl PrimitiveStore {
 
                 let metadata = PrimitiveMetadata {
                     is_opaque: false,
-                    mask_texture_id: SourceTexture::Invalid,
-                    clip_index: None,
                     clip_source: clip_source,
+                    clip_cache_info: None,
                     prim_kind: PrimitiveKind::BoxShadow,
                     cpu_prim_index: SpecificPrimitiveIndex::invalid(),
                     gpu_prim_index: gpu_prim_address,
@@ -568,18 +544,15 @@ impl PrimitiveStore {
         for prim_index in self.prims_to_resolve.drain(..) {
             let metadata = &mut self.cpu_metadata[prim_index.0];
 
-            if let &PrimitiveClipSource::Region(ClipRegion { image_mask: Some(mask), .. }) = metadata.clip_source.as_ref() {
-                let tex_cache = resource_cache.get_image(mask.image, ImageRendering::Auto);
-                metadata.mask_texture_id = tex_cache.texture_id;
-                if let Some(address) = metadata.clip_index {
-                    let clip_data = self.gpu_data32.get_slice_mut(address, 6);
-                    clip_data[5] = GpuBlock32::from(ImageMaskData {
-                        uv_rect: Rect::new(tex_cache.uv0,
-                                           Size2D::new(tex_cache.uv1.x - tex_cache.uv0.x,
-                                                       tex_cache.uv1.y - tex_cache.uv0.y)),
-                        local_rect: mask.rect,
-                    });
-                }
+            if let Some(MaskCacheInfo{ key: MaskCacheKey { image: Some(gpu_address), .. }, image: Some(ref mask), .. }) = metadata.clip_cache_info {
+                let cache_item = resource_cache.get_image(mask.image, ImageRendering::Auto);
+                let mask_data = self.gpu_data32.get_slice_mut(gpu_address, MASK_DATA_GPU_SIZE);
+                mask_data[0] = GpuBlock32::from(ImageMaskData {
+                    uv_rect: Rect::new(cache_item.uv0,
+                                       Size2D::new(cache_item.uv1.x - cache_item.uv0.x,
+                                                   cache_item.uv1.y - cache_item.uv0.y)),
+                    local_rect: mask.rect,
+                });
             }
 
             match metadata.prim_kind {
@@ -644,8 +617,8 @@ impl PrimitiveStore {
         if let Some(rect) = rect {
             self.gpu_geometry.get_mut(GpuStoreAddress(index.0 as i32))
                 .local_clip_rect = rect;
-            if is_complex && metadata.clip_index.is_none() {
-                metadata.clip_index = Some(self.gpu_data32.alloc(6))
+            if is_complex {
+                metadata.clip_cache_info = None; //CLIP TODO: re-use the existing GPU allocation
             }
         }
         *metadata.clip_source.as_mut() = source;
@@ -685,50 +658,25 @@ impl PrimitiveStore {
     pub fn prepare_prim_for_render(&mut self,
                                    prim_index: PrimitiveIndex,
                                    resource_cache: &mut ResourceCache,
+                                   layer_transform: &Matrix4D<f32>,
+                                   layer_combined_local_clip_rect: &Rect<f32>,
                                    device_pixel_ratio: f32,
-                                   dummy_mask_cache_item: &TextureCacheItem,
                                    auxiliary_lists: &AuxiliaryLists) -> bool {
+
         let metadata = &mut self.cpu_metadata[prim_index.0];
         let mut prim_needs_resolve = false;
         let mut rebuild_bounding_rect = false;
 
-        if metadata.clip_index.is_none() {
-            // if the `clip_index` already exist, we consider the contents up to date
-            let clip_data = match metadata.clip_source.as_ref() {
-                &PrimitiveClipSource::NoClip => None,
-                &PrimitiveClipSource::Complex(rect, radius) => {
-                    Some(ClipData::uniform(rect, radius))
-                }
-                &PrimitiveClipSource::Region(ref clip_region) => {
-                    if let Some(mask) = clip_region.image_mask {
-                        resource_cache.request_image(mask.image, ImageRendering::Auto);
-                    }
-                    let clips = auxiliary_lists.complex_clip_regions(&clip_region.complex);
-                    //TODO: proper solution to multiple complex clips
-                    match clips.len() {
-                        0 if clip_region.image_mask.is_none() => None,
-                        0 => Some(ClipData::uniform(clip_region.main, 0.0)),
-                        1 => Some(ClipData::from_clip_region(&clips[0])),
-                        _ => {
-                            let internal_clip = clips.last().unwrap();
-                            let region = if clips.iter().all(|current_clip| current_clip.might_contain(internal_clip)) {
-                                internal_clip
-                            } else {
-                                &clips[0]
-                            };
-                            Some(ClipData::from_clip_region(region))
-                        },
-                    }
-                }
-            };
-
-            if let Some(data) = clip_data {
+        if let Some(ref mut clip_info) = metadata.clip_cache_info {
+            clip_info.update(&metadata.clip_source,
+                             layer_transform,
+                             layer_combined_local_clip_rect,
+                             &mut self.gpu_data32,
+                             device_pixel_ratio,
+                             auxiliary_lists);
+            if let &PrimitiveClipSource::Region(ClipRegion{ image_mask: Some(ref mask), .. }) = metadata.clip_source.as_ref() {
+                resource_cache.request_image(mask.image, ImageRendering::Auto);
                 prim_needs_resolve = true;
-                let gpu_address = self.gpu_data32.alloc(6);
-                let gpu_data = self.gpu_data32.get_slice_mut(gpu_address, 6);
-                Self::populate_clip_data(gpu_data, data);
-                metadata.clip_index = Some(gpu_address);
-                metadata.mask_texture_id = SourceTexture::TextureCache(dummy_mask_cache_item.texture_id);
             }
         }
 
diff --git a/webrender/src/render_backend.rs b/webrender/src/render_backend.rs
index 04e4bf731b..73a15642c8 100644
--- a/webrender/src/render_backend.rs
+++ b/webrender/src/render_backend.rs
@@ -8,7 +8,7 @@ use internal_types::{FontTemplate, GLContextHandleWrapper, GLContextWrapper};
 use internal_types::{SourceTexture, ResultMsg, RendererFrame};
 use ipc_channel::ipc::{IpcBytesReceiver, IpcBytesSender, IpcReceiver};
 use profiler::BackendProfileCounters;
-use resource_cache::{DummyResources, ResourceCache};
+use resource_cache::ResourceCache;
 use scene::Scene;
 use std::collections::HashMap;
 use std::fs;
@@ -36,7 +36,6 @@ pub struct RenderBackend {
     next_namespace_id: IdNamespace,
 
     resource_cache: ResourceCache,
-    dummy_resources: DummyResources,
 
     scene: Scene,
     frame: Frame,
@@ -58,7 +57,6 @@ impl RenderBackend {
                result_tx: Sender<ResultMsg>,
                device_pixel_ratio: f32,
                texture_cache: TextureCache,
-               dummy_resources: DummyResources,
                enable_aa: bool,
                notifier: Arc<Mutex<Option<Box<RenderNotifier>>>>,
                webrender_context_handle: Option<GLContextHandleWrapper>,
@@ -78,7 +76,6 @@ impl RenderBackend {
             result_tx: result_tx,
             device_pixel_ratio: device_pixel_ratio,
             resource_cache: resource_cache,
-            dummy_resources: dummy_resources,
             scene: Scene::new(),
             frame: Frame::new(debug, config),
             next_namespace_id: IdNamespace(1),
@@ -342,7 +339,6 @@ impl RenderBackend {
         }
 
         self.frame.create(&self.scene,
-                          &self.dummy_resources,
                           &mut new_pipeline_sizes,
                           self.device_pixel_ratio);
 
diff --git a/webrender/src/renderer.rs b/webrender/src/renderer.rs
index 561791a1b8..785c869ac8 100644
--- a/webrender/src/renderer.rs
+++ b/webrender/src/renderer.rs
@@ -23,7 +23,6 @@ use ipc_channel::ipc;
 use profiler::{Profiler, BackendProfileCounters};
 use profiler::{GpuProfileTag, RendererProfileTimers, RendererProfileCounters};
 use render_backend::RenderBackend;
-use resource_cache::DummyResources;
 use std::cmp;
 use std::collections::HashMap;
 use std::f32;
@@ -46,19 +45,17 @@ pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 const UBO_BIND_DATA: u32 = 1;
 
 const GPU_TAG_CACHE_BOX_SHADOW: GpuProfileTag = GpuProfileTag { label: "C_BoxShadow", color: debug_colors::BLACK };
+const GPU_TAG_CACHE_CLIP: GpuProfileTag = GpuProfileTag { label: "C_Clip", color: debug_colors::PURPLE };
 const GPU_TAG_CACHE_TEXT_RUN: GpuProfileTag = GpuProfileTag { label: "C_TextRun", color: debug_colors::MISTYROSE };
 const GPU_TAG_INIT: GpuProfileTag = GpuProfileTag { label: "Init", color: debug_colors::WHITE };
 const GPU_TAG_SETUP_TARGET: GpuProfileTag = GpuProfileTag { label: "Target", color: debug_colors::SLATEGREY };
 const GPU_TAG_CLEAR_TILES: GpuProfileTag = GpuProfileTag { label: "Clear Tiles", color: debug_colors::BROWN };
 const GPU_TAG_PRIM_RECT: GpuProfileTag = GpuProfileTag { label: "Rect", color: debug_colors::RED };
-const GPU_TAG_PRIM_RECT_CLIP: GpuProfileTag = GpuProfileTag { label: "RectClip", color: debug_colors::DARKRED };
 const GPU_TAG_PRIM_IMAGE: GpuProfileTag = GpuProfileTag { label: "Image", color: debug_colors::GREEN };
-const GPU_TAG_PRIM_IMAGE_CLIP: GpuProfileTag = GpuProfileTag { label: "ImageClip", color: debug_colors::DARKGREEN };
 const GPU_TAG_PRIM_BLEND: GpuProfileTag = GpuProfileTag { label: "Blend", color: debug_colors::LIGHTBLUE };
 const GPU_TAG_PRIM_COMPOSITE: GpuProfileTag = GpuProfileTag { label: "Composite", color: debug_colors::MAGENTA };
 const GPU_TAG_PRIM_TEXT_RUN: GpuProfileTag = GpuProfileTag { label: "TextRun", color: debug_colors::BLUE };
 const GPU_TAG_PRIM_GRADIENT: GpuProfileTag = GpuProfileTag { label: "Gradient", color: debug_colors::YELLOW };
-const GPU_TAG_PRIM_GRADIENT_CLIP: GpuProfileTag = GpuProfileTag { label: "GradientClip", color: debug_colors::YELLOWGREEN };
 const GPU_TAG_PRIM_ANGLE_GRADIENT: GpuProfileTag = GpuProfileTag { label: "AngleGradient", color: debug_colors::POWDERBLUE };
 const GPU_TAG_PRIM_BOX_SHADOW: GpuProfileTag = GpuProfileTag { label: "BoxShadow", color: debug_colors::CYAN };
 const GPU_TAG_PRIM_BORDER: GpuProfileTag = GpuProfileTag { label: "Border", color: debug_colors::ORANGE };
@@ -126,6 +123,7 @@ enum ShaderKind {
     Primitive,
     Clear,
     Cache,
+    ClipCache,
 }
 
 struct LazilyCompiledShader {
@@ -172,6 +170,11 @@ impl LazilyCompiledShader {
                                        self.max_ubo_vectors,
                                        &self.features)
                 }
+                ShaderKind::ClipCache => {
+                    create_clip_shader(self.name,
+                                       device,
+                                       self.max_ubo_vectors)
+                }
             };
             self.id = Some(id);
         }
@@ -266,13 +269,7 @@ fn create_prim_shader(name: &'static str,
         prefix.push_str(&format!("#define WR_FEATURE_{}\n", feature));
     }
 
-    let includes_base = ["prim_shared"];
-    let includes_clip = ["prim_shared", "clip_shared"];
-    let includes: &[&str] = if name.ends_with("_clip") {
-        &includes_clip
-    } else {
-        &includes_base
-    };
+    let includes = &["prim_shared"];
     let program_id = device.create_program_with_prefix(name,
                                                        includes,
                                                        Some(prefix));
@@ -283,6 +280,26 @@ fn create_prim_shader(name: &'static str,
     program_id
 }
 
+fn create_clip_shader(name: &'static str,
+                      device: &mut Device,
+                      max_ubo_vectors: usize) -> ProgramId {
+    let prefix = format!("#define WR_MAX_UBO_VECTORS {}\n\
+                          #define WR_MAX_VERTEX_TEXTURE_WIDTH {}\n
+                          #define WR_FEATURE_TRANSFORM",
+                          max_ubo_vectors,
+                          MAX_VERTEX_TEXTURE_WIDTH);
+
+    let includes = &["prim_shared", "clip_shared"];
+    let program_id = device.create_program_with_prefix(name,
+                                                       includes,
+                                                       Some(prefix));
+    let data_index = device.assign_ubo_binding(program_id, "Data", UBO_BIND_DATA);
+
+    debug!("ClipShader {}: data={} max={}", name, data_index, max_ubo_vectors);
+
+    program_id
+}
+
 fn create_clear_shader(name: &'static str,
                        device: &mut Device,
                        max_ubo_vectors: usize) -> ProgramId {
@@ -314,6 +331,9 @@ pub struct Renderer {
     // draw intermediate results to cache targets. The results
     // of these shaders are then used by the primitive shaders.
     cs_box_shadow: LazilyCompiledShader,
+    cs_clip_clear: LazilyCompiledShader,
+    cs_clip_rectangle: LazilyCompiledShader,
+    cs_clip_image: LazilyCompiledShader,
     cs_text_run: LazilyCompiledShader,
     cs_blur: LazilyCompiledShader,
 
@@ -330,11 +350,8 @@ pub struct Renderer {
     ps_image: PrimitiveShader,
     ps_border: PrimitiveShader,
     ps_gradient: PrimitiveShader,
-    ps_gradient_clip: PrimitiveShader,
     ps_angle_gradient: PrimitiveShader,
     ps_box_shadow: PrimitiveShader,
-    ps_rectangle_clip: PrimitiveShader,
-    ps_image_clip: PrimitiveShader,
     ps_cache_image: PrimitiveShader,
 
     ps_blend: LazilyCompiledShader,
@@ -425,6 +442,7 @@ impl Renderer {
 
         let max_prim_instances = get_ubo_max_len::<tiling::PrimitiveInstance>(max_ubo_size);
         let max_cache_instances = get_ubo_max_len::<tiling::CachePrimitiveInstance>(max_ubo_size);
+        let max_clip_instances = get_ubo_max_len::<tiling::CacheClipInstance>(max_ubo_size);
         let max_prim_blends = get_ubo_max_len::<tiling::PackedBlendPrimitive>(max_ubo_size);
         let max_prim_composites = get_ubo_max_len::<tiling::PackedCompositePrimitive>(max_ubo_size);
         let max_blurs = get_ubo_max_len::<tiling::BlurCommand>(max_ubo_size);
@@ -435,6 +453,24 @@ impl Renderer {
                                                       &[],
                                                       &mut device,
                                                       options.precache_shaders);
+        let cs_clip_clear = LazilyCompiledShader::new(ShaderKind::ClipCache,
+                                                      "cs_clip_clear",
+                                                      max_clip_instances,
+                                                      &[],
+                                                      &mut device,
+                                                      options.precache_shaders);
+        let cs_clip_rectangle = LazilyCompiledShader::new(ShaderKind::ClipCache,
+                                                          "cs_clip_rectangle",
+                                                          max_clip_instances,
+                                                          &[],
+                                                          &mut device,
+                                                          options.precache_shaders);
+        let cs_clip_image = LazilyCompiledShader::new(ShaderKind::ClipCache,
+                                                      "cs_clip_image",
+                                                      max_clip_instances,
+                                                      &[],
+                                                      &mut device,
+                                                      options.precache_shaders);
         let cs_text_run = LazilyCompiledShader::new(ShaderKind::Cache,
                                                     "cs_text_run",
                                                     max_cache_instances,
@@ -478,18 +514,6 @@ impl Renderer {
                                              &mut device,
                                              &[],
                                              options.precache_shaders);
-        let ps_rectangle_clip = PrimitiveShader::new("ps_rectangle_clip",
-                                                     max_ubo_vectors,
-                                                     max_prim_instances,
-                                                     &mut device,
-                                                     &[],
-                                                     options.precache_shaders);
-        let ps_image_clip = PrimitiveShader::new("ps_image_clip",
-                                                 max_ubo_vectors,
-                                                 max_prim_instances,
-                                                 &mut device,
-                                                 &[],
-                                                 options.precache_shaders);
 
         let ps_box_shadow = PrimitiveShader::new("ps_box_shadow",
                                                  max_ubo_vectors,
@@ -504,12 +528,6 @@ impl Renderer {
                                                &mut device,
                                                &[],
                                                options.precache_shaders);
-        let ps_gradient_clip = PrimitiveShader::new("ps_gradient_clip",
-                                                    max_ubo_vectors,
-                                                    max_prim_instances,
-                                                    &mut device,
-                                                    &[],
-                                                    options.precache_shaders);
         let ps_angle_gradient = PrimitiveShader::new("ps_angle_gradient",
                                                      max_ubo_vectors,
                                                      max_prim_instances,
@@ -575,11 +593,6 @@ impl Renderer {
                              TextureFilter::Linear,
                              mask_pixels);
 
-        let dummy_resources = DummyResources {
-            white_image_id: white_image_id,
-            opaque_mask_image_id: dummy_mask_image_id,
-        };
-
         let debug_renderer = DebugRenderer::new(&mut device);
 
         let layer_texture = VertexDataTexture::new(&mut device);
@@ -645,7 +658,6 @@ impl Renderer {
                                                  result_tx,
                                                  device_pixel_ratio,
                                                  texture_cache,
-                                                 dummy_resources,
                                                  enable_aa,
                                                  backend_notifier,
                                                  context_handle,
@@ -665,6 +677,9 @@ impl Renderer {
             device_pixel_ratio: options.device_pixel_ratio,
             tile_clear_shader: tile_clear_shader,
             cs_box_shadow: cs_box_shadow,
+            cs_clip_clear: cs_clip_clear,
+            cs_clip_rectangle: cs_clip_rectangle,
+            cs_clip_image: cs_clip_image,
             cs_text_run: cs_text_run,
             cs_blur: cs_blur,
             ps_rectangle: ps_rectangle,
@@ -672,11 +687,8 @@ impl Renderer {
             ps_text_run_subpixel: ps_text_run_subpixel,
             ps_image: ps_image,
             ps_border: ps_border,
-            ps_rectangle_clip: ps_rectangle_clip,
-            ps_image_clip: ps_image_clip,
             ps_box_shadow: ps_box_shadow,
             ps_gradient: ps_gradient,
-            ps_gradient_clip: ps_gradient_clip,
             ps_angle_gradient: ps_angle_gradient,
             ps_cache_image: ps_cache_image,
             ps_blend: ps_blend,
@@ -985,8 +997,6 @@ impl Renderer {
             let texture_id = self.resolve_source_texture(&textures.colors[i]);
             self.device.bind_texture(TextureSampler::color(i), texture_id);
         }
-        let mask_texture_id = self.resolve_source_texture(&textures.mask);
-        self.device.bind_texture(TextureSampler::Mask, mask_texture_id);
 
         for chunk in ubo_data.chunks(max_prim_items) {
             let ubo = self.device.create_ubo(&chunk, UBO_BIND_DATA);
@@ -1014,24 +1024,24 @@ impl Renderer {
         self.device.set_blend(false);
         self.device.set_blend_mode_alpha();
         if let Some(cache_texture) = cache_texture {
-	        self.device.bind_texture(TextureSampler::Cache, cache_texture);
-	    }
+            self.device.bind_texture(TextureSampler::Cache, cache_texture);
+        }
 
         let (color, projection) = match render_target {
             Some(..) => (
                 [0.0, 0.0, 0.0, 0.0],
                 Matrix4D::ortho(0.0,
-                               target_size.width as f32,
+                               target_size.width,
                                0.0,
-                               target_size.height as f32,
+                               target_size.height,
                                ORTHO_NEAR_PLANE,
                                ORTHO_FAR_PLANE)
             ),
             None => (
                 [1.0, 1.0, 1.0, 1.0],
                 Matrix4D::ortho(0.0,
-                               target_size.width as f32,
-                               target_size.height as f32,
+                               target_size.width,
+                               target_size.height,
                                0.0,
                                ORTHO_NEAR_PLANE,
                                ORTHO_FAR_PLANE)
@@ -1080,15 +1090,56 @@ impl Renderer {
         // Draw any box-shadow caches for this target.
         if !target.box_shadow_cache_prims.is_empty() {
             self.device.set_blend(false);
-
             self.gpu_profile.add_marker(GPU_TAG_CACHE_BOX_SHADOW);
             let shader = self.cs_box_shadow.get(&mut self.device);
-            let max_cache_instances = self.max_cache_instances;
+            let max_prim_items = self.max_cache_instances;
             self.draw_ubo_batch(&target.box_shadow_cache_prims,
                                 shader,
                                 1,
                                 &BatchTextures::no_texture(),
-                                max_cache_instances,
+                                max_prim_items,
+                                &projection);
+        }
+
+        // Draw the clip items into the tiled alpha mask.
+        self.gpu_profile.add_marker(GPU_TAG_CACHE_CLIP);
+        // first, mark the target area as opaque
+        //Note: not needed if we know the target is cleared with opaque
+        self.device.set_blend(false);
+        if !target.clip_batcher.clears.is_empty() {
+            let shader = self.cs_clip_clear.get(&mut self.device);
+            let max_prim_items = self.max_clear_tiles;
+            self.draw_ubo_batch(&target.clip_batcher.clears,
+                                shader,
+                                1,
+                                &BatchTextures::no_texture(),
+                                max_prim_items,
+                                &projection);
+        }
+        // now switch to multiplicative blending
+        self.device.set_blend(true);
+        self.device.set_blend_mode_multiply();
+        let max_prim_items = self.max_cache_instances;
+        // draw rounded cornered rectangles
+        if !target.clip_batcher.rectangles.is_empty() {
+            let shader = self.cs_clip_rectangle.get(&mut self.device);
+            self.draw_ubo_batch(&target.clip_batcher.rectangles,
+                                shader,
+                                1,
+                                &BatchTextures::no_texture(),
+                                max_prim_items,
+                                &projection);
+        }
+        // draw image masks
+        for (mask_texture_id, items) in target.clip_batcher.images.iter() {
+            let texture_id = self.resolve_source_texture(mask_texture_id);
+            self.device.bind_texture(TextureSampler::Mask, texture_id);
+            let shader = self.cs_clip_image.get(&mut self.device);
+            self.draw_ubo_batch(items,
+                                shader,
+                                1,
+                                &BatchTextures::no_texture(),
+                                max_prim_items,
                                 &projection);
         }
 
@@ -1113,16 +1164,21 @@ impl Renderer {
                                 &projection);
         }
 
+        self.device.set_blend(false);
         let mut prev_blend_mode = BlendMode::None;
 
         for batch in &target.alpha_batcher.batches {
             let transform_kind = batch.key.flags.transform_kind();
-            let has_complex_clip = batch.key.flags.needs_clipping();
+            let needs_clipping = batch.key.flags.needs_clipping();
+            assert!(!needs_clipping || batch.key.blend_mode == BlendMode::Alpha);
 
             if batch.key.blend_mode != prev_blend_mode {
                 match batch.key.blend_mode {
-                    BlendMode::None | BlendMode::Alpha => {
-                        self.device.set_blend(batch.key.blend_mode == BlendMode::Alpha);
+                    BlendMode::None => {
+                        self.device.set_blend(false);
+                    }
+                    BlendMode::Alpha => {
+                        self.device.set_blend(true);
                         self.device.set_blend_mode_alpha();
                     }
                     BlendMode::Subpixel(color) => {
@@ -1171,13 +1227,8 @@ impl Renderer {
 
                 }
                 &PrimitiveBatchData::Rectangles(ref ubo_data) => {
-                    let (shader, max_prim_items) = if has_complex_clip {
-                        self.gpu_profile.add_marker(GPU_TAG_PRIM_RECT_CLIP);
-                        self.ps_rectangle_clip.get(&mut self.device, transform_kind)
-                    } else {
-                        self.gpu_profile.add_marker(GPU_TAG_PRIM_RECT);
-                        self.ps_rectangle.get(&mut self.device, transform_kind)
-                    };
+                    self.gpu_profile.add_marker(GPU_TAG_PRIM_RECT);
+                    let (shader, max_prim_items) = self.ps_rectangle.get(&mut self.device, transform_kind);
                     self.draw_ubo_batch(ubo_data,
                                         shader,
                                         1,
@@ -1186,13 +1237,8 @@ impl Renderer {
                                         &projection);
                 }
                 &PrimitiveBatchData::Image(ref ubo_data) => {
-                    let (shader, max_prim_items) = if has_complex_clip {
-                        self.gpu_profile.add_marker(GPU_TAG_PRIM_IMAGE_CLIP);
-                        self.ps_image_clip.get(&mut self.device, transform_kind)
-                    } else {
-                        self.gpu_profile.add_marker(GPU_TAG_PRIM_IMAGE);
-                        self.ps_image.get(&mut self.device, transform_kind)
-                    };
+                    self.gpu_profile.add_marker(GPU_TAG_PRIM_IMAGE);
+                    let (shader, max_prim_items) = self.ps_image.get(&mut self.device, transform_kind);
                     self.draw_ubo_batch(ubo_data,
                                         shader,
                                         1,
@@ -1234,13 +1280,8 @@ impl Renderer {
                                         &projection);
                 }
                 &PrimitiveBatchData::AlignedGradient(ref ubo_data) => {
-                    let (shader, max_prim_items) = if has_complex_clip {
-                        self.gpu_profile.add_marker(GPU_TAG_PRIM_GRADIENT_CLIP);
-                        self.ps_gradient_clip.get(&mut self.device, transform_kind)
-                    } else {
-                        self.gpu_profile.add_marker(GPU_TAG_PRIM_GRADIENT);
-                        self.ps_gradient.get(&mut self.device, transform_kind)
-                    };
+                    self.gpu_profile.add_marker(GPU_TAG_PRIM_GRADIENT);
+                    let (shader, max_prim_items) = self.ps_gradient.get(&mut self.device, transform_kind);
                     self.draw_ubo_batch(ubo_data,
                                         shader,
                                         1,
@@ -1365,19 +1406,15 @@ impl Renderer {
 
         // Clear tiles with no items
         if !frame.clear_tiles.is_empty() {
-            let tile_clear_shader = self.tile_clear_shader.get(&mut self.device);
-            self.device.bind_program(tile_clear_shader, &projection);
-            self.device.bind_vao(self.quad_vao_id);
-
-            for chunk in frame.clear_tiles.chunks(self.max_clear_tiles) {
-                let ubo = self.device.create_ubo(&chunk, UBO_BIND_DATA);
-
-                self.device.draw_indexed_triangles_instanced_u16(6, chunk.len() as i32);
-                self.profile_counters.vertices.add(6 * chunk.len());
-                self.profile_counters.draw_calls.inc();
-
-                self.device.delete_buffer(ubo);
-            }
+            self.device.set_blend(false);
+            let shader = self.tile_clear_shader.get(&mut self.device);
+            let max_prim_items = self.max_clear_tiles;
+            self.draw_ubo_batch(&frame.clear_tiles,
+                                shader,
+                                1,
+                                &BatchTextures::no_texture(),
+                                max_prim_items,
+                                &projection);
         }
     }
 
diff --git a/webrender/src/resource_cache.rs b/webrender/src/resource_cache.rs
index c439f21ca0..2de2bf5b2a 100644
--- a/webrender/src/resource_cache.rs
+++ b/webrender/src/resource_cache.rs
@@ -16,7 +16,7 @@ use std::collections::hash_map::Entry::{self, Occupied, Vacant};
 use std::fmt::Debug;
 use std::hash::BuildHasherDefault;
 use std::hash::Hash;
-use texture_cache::{TextureCache, TextureCacheItem, TextureCacheItemId};
+use texture_cache::{TextureCache, TextureCacheItemId};
 use webrender_traits::{Epoch, FontKey, GlyphKey, ImageKey, ImageFormat, ImageRendering};
 use webrender_traits::{FontRenderMode, GlyphDimensions, WebGLContextId};
 
@@ -67,12 +67,6 @@ enum State {
     QueryResources,
 }
 
-#[derive(Clone, Debug)]
-pub struct DummyResources {
-    pub white_image_id: TextureCacheItemId,
-    pub opaque_mask_image_id: TextureCacheItemId,
-}
-
 struct ImageResource {
     bytes: Vec<u8>,
     width: u32,
@@ -428,12 +422,6 @@ impl ResourceCache {
         }
     }
 
-    #[inline]
-    pub fn get_image_by_cache_id(&self, texture_cache_id: TextureCacheItemId)
-                                 -> &TextureCacheItem {
-        self.texture_cache.get(texture_cache_id)
-    }
-
     #[inline]
     pub fn get_webgl_texture(&self, context_id: &WebGLContextId) -> CacheItem {
         let webgl_texture = &self.webgl_textures[context_id];
diff --git a/webrender/src/tiling.rs b/webrender/src/tiling.rs
index d2ee1ba849..2b94f1ca22 100644
--- a/webrender/src/tiling.rs
+++ b/webrender/src/tiling.rs
@@ -12,20 +12,21 @@ use internal_types::{DeviceRect, DevicePoint, DeviceSize, DeviceLength, device_p
 use internal_types::{ANGLE_FLOAT_TO_FIXED, LowLevelFilterOp};
 use internal_types::{BatchTextures, CacheTextureId, SourceTexture};
 use layer::Layer;
+use mask_cache::{MaskCacheKey, MaskCacheInfo};
 use prim_store::{PrimitiveGeometry, RectanglePrimitive, PrimitiveContainer};
 use prim_store::{BorderPrimitiveCpu, BorderPrimitiveGpu, BoxShadowPrimitiveGpu};
 use prim_store::{ImagePrimitiveCpu, ImagePrimitiveGpu, ImagePrimitiveKind};
 use prim_store::{PrimitiveKind, PrimitiveIndex, PrimitiveMetadata};
-use prim_store::PrimitiveClipSource;
+use prim_store::{CLIP_DATA_GPU_SIZE, PrimitiveClipSource};
 use prim_store::{GradientPrimitiveCpu, GradientPrimitiveGpu, GradientType};
 use prim_store::{PrimitiveCacheKey, TextRunPrimitiveGpu, TextRunPrimitiveCpu};
 use prim_store::{PrimitiveStore, GpuBlock16, GpuBlock32, GpuBlock64, GpuBlock128};
 use profiler::FrameProfileCounters;
 use renderer::BlendMode;
-use resource_cache::{DummyResources, ResourceCache};
+use resource_cache::ResourceCache;
 use std::cmp;
 use std::collections::{HashMap};
-use std::f32;
+use std::{i32, f32};
 use std::mem;
 use std::hash::{BuildHasherDefault};
 use std::sync::atomic::{AtomicUsize, Ordering};
@@ -60,7 +61,7 @@ trait AlphaBatchHelpers {
                          prim_index: PrimitiveIndex,
                          batch: &mut PrimitiveBatch,
                          layer_index: StackingContextIndex,
-                         task_id: i32,
+                         task_index: i32,
                          render_tasks: &RenderTaskCollection,
                          pass_index: RenderPassIndex);
 }
@@ -150,13 +151,19 @@ impl AlphaBatchHelpers for PrimitiveStore {
                          device_pixel_ratio: f32) -> bool {
         let metadata = self.get_metadata(prim_index);
 
+        // bail out if the clip rectangle is outside of the tile
+        if let Some(ref clip_info) = metadata.clip_cache_info {
+            if !clip_info.device_rect.intersects(tile_rect) {
+                return false;
+            }
+        }
+
         match metadata.prim_kind {
             PrimitiveKind::Rectangle |
             PrimitiveKind::TextRun |
             PrimitiveKind::Image |
             PrimitiveKind::Gradient |
             PrimitiveKind::BoxShadow => true,
-
             PrimitiveKind::Border => {
                 let border = &self.cpu_borders[metadata.cpu_prim_index.0];
                 let inner_rect = TransformedRect::new(&border.inner_rect,
@@ -172,14 +179,21 @@ impl AlphaBatchHelpers for PrimitiveStore {
                          prim_index: PrimitiveIndex,
                          batch: &mut PrimitiveBatch,
                          layer_index: StackingContextIndex,
-                         task_id: i32,
+                         task_index: i32,
                          render_tasks: &RenderTaskCollection,
                          child_pass_index: RenderPassIndex) {
         let metadata = self.get_metadata(prim_index);
         let layer_index = layer_index.0 as i32;
         let global_prim_id = prim_index.0 as i32;
         let prim_address = metadata.gpu_prim_index;
-        let clip_address = metadata.clip_index.unwrap_or(GpuStoreAddress(0));
+        let clip_task_index = match metadata.clip_cache_info {
+            Some(ref clip_info) => {
+                let cache_task_id = RenderTaskId::Dynamic(RenderTaskKey::CacheMask(clip_info.key));
+                let cache_task_index = render_tasks.get_task_index(&cache_task_id, child_pass_index);
+                cache_task_index.0 as i32
+            },
+            None => i32::MAX, //sentinel value for the dummy mask
+        };
 
         match &mut batch.data {
             &mut PrimitiveBatchData::Blend(..) |
@@ -187,11 +201,11 @@ impl AlphaBatchHelpers for PrimitiveStore {
 
             &mut PrimitiveBatchData::Rectangles(ref mut data) => {
                 data.push(PrimitiveInstance {
-                    task_id: task_id,
+                    task_index: task_index,
+                    clip_task_index: clip_task_index,
                     layer_index: layer_index,
                     global_prim_id: global_prim_id,
                     prim_address: prim_address,
-                    clip_address: clip_address,
                     sub_index: 0,
                     user_data: [0, 0],
                 });
@@ -199,11 +213,11 @@ impl AlphaBatchHelpers for PrimitiveStore {
             &mut PrimitiveBatchData::TextRun(ref mut data) => {
                 for glyph_index in 0..metadata.gpu_data_count {
                     data.push(PrimitiveInstance {
-                        task_id: task_id,
+                        task_index: task_index,
+                        clip_task_index: clip_task_index,
                         layer_index: layer_index,
                         global_prim_id: global_prim_id,
                         prim_address: prim_address,
-                        clip_address: clip_address,
                         sub_index: metadata.gpu_data_address.0 + glyph_index,
                         user_data: [ 0, 0 ],
                     });
@@ -211,11 +225,11 @@ impl AlphaBatchHelpers for PrimitiveStore {
             }
             &mut PrimitiveBatchData::Image(ref mut data) => {
                 data.push(PrimitiveInstance {
-                    task_id: task_id,
+                    task_index: task_index,
+                    clip_task_index: clip_task_index,
                     layer_index: layer_index,
                     global_prim_id: global_prim_id,
                     prim_address: prim_address,
-                    clip_address: clip_address,
                     sub_index: 0,
                     user_data: [ 0, 0 ],
                 });
@@ -223,11 +237,11 @@ impl AlphaBatchHelpers for PrimitiveStore {
             &mut PrimitiveBatchData::Borders(ref mut data) => {
                 for border_segment in 0..8 {
                     data.push(PrimitiveInstance {
-                        task_id: task_id,
+                        task_index: task_index,
+                        clip_task_index: clip_task_index,
                         layer_index: layer_index,
                         global_prim_id: global_prim_id,
                         prim_address: prim_address,
-                        clip_address: clip_address,
                         sub_index: border_segment,
                         user_data: [ 0, 0 ],
                     });
@@ -236,11 +250,11 @@ impl AlphaBatchHelpers for PrimitiveStore {
             &mut PrimitiveBatchData::AlignedGradient(ref mut data) => {
                 for part_index in 0..(metadata.gpu_data_count - 1) {
                     data.push(PrimitiveInstance {
-                        task_id: task_id,
+                        task_index: task_index,
+                        clip_task_index: clip_task_index,
                         layer_index: layer_index,
                         global_prim_id: global_prim_id,
                         prim_address: prim_address,
-                        clip_address: clip_address,
                         sub_index: metadata.gpu_data_address.0 + part_index,
                         user_data: [ 0, 0 ],
                     });
@@ -248,11 +262,11 @@ impl AlphaBatchHelpers for PrimitiveStore {
             }
             &mut PrimitiveBatchData::AngleGradient(ref mut data) => {
                 data.push(PrimitiveInstance {
-                    task_id: task_id,
+                    task_index: task_index,
+                    clip_task_index: clip_task_index,
                     layer_index: layer_index,
                     global_prim_id: global_prim_id,
                     prim_address: prim_address,
-                    clip_address: clip_address,
                     sub_index: metadata.gpu_data_address.0,
                     user_data: [ metadata.gpu_data_count, 0 ],
                 });
@@ -267,11 +281,11 @@ impl AlphaBatchHelpers for PrimitiveStore {
                                                                    child_pass_index);
 
                 data.push(PrimitiveInstance {
-                    task_id: task_id,
+                    task_index: task_index,
+                    clip_task_index: clip_task_index,
                     layer_index: layer_index,
                     global_prim_id: global_prim_id,
                     prim_address: prim_address,
-                    clip_address: clip_address,
                     sub_index: 0,
                     user_data: [ cache_task_index.0 as i32, 0 ],
                 });
@@ -283,11 +297,11 @@ impl AlphaBatchHelpers for PrimitiveStore {
 
                 for rect_index in 0..metadata.gpu_data_count {
                     data.push(PrimitiveInstance {
-                        task_id: task_id,
+                        task_index: task_index,
+                        clip_task_index: clip_task_index,
                         layer_index: layer_index,
                         global_prim_id: global_prim_id,
                         prim_address: prim_address,
-                        clip_address: clip_address,
                         sub_index: metadata.gpu_data_address.0 + rect_index,
                         user_data: [ cache_task_index.0 as i32, 0 ],
                     });
@@ -334,11 +348,13 @@ pub struct RenderTaskIndex(usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub enum RenderTaskKey {
-    // Draw this primitive to a cache target.
+    /// Draw this primitive to a cache target.
     CachePrimitive(PrimitiveCacheKey),
-    // Apply a vertical blur pass of given radius for this primitive.
+    /// Draw the tile alpha mask for a primitive.
+    CacheMask(MaskCacheKey),
+    /// Apply a vertical blur pass of given radius for this primitive.
     VerticalBlur(i32, PrimitiveIndex),
-    // Apply a horizontal blur pass of given radius for this primitive.
+    /// Apply a horizontal blur pass of given radius for this primitive.
     HorizontalBlur(i32, PrimitiveIndex),
 }
 
@@ -366,10 +382,11 @@ impl RenderTaskCollection {
         }
     }
 
-    fn add(&mut self, task: &RenderTask, pass: RenderPassIndex) {
+    fn add(&mut self, task: &RenderTask, pass: RenderPassIndex) -> RenderTaskIndex {
         match task.id {
             RenderTaskId::Static(index) => {
                 self.render_task_data[index.0] = task.write_task_data();
+                index
             }
             RenderTaskId::Dynamic(key) => {
                 let index = RenderTaskIndex(self.render_task_data.len());
@@ -384,6 +401,7 @@ impl RenderTaskCollection {
                     },
                 });
                 self.render_task_data.push(task.write_task_data());
+                index
             }
         }
     }
@@ -470,25 +488,23 @@ impl AlphaBatcher {
              child_pass_index: RenderPassIndex) {
         let mut batches: Vec<PrimitiveBatch> = vec![];
         for task in &mut self.tasks {
-            let task_index = render_tasks.get_static_task_index(&task.task_id);
-            let task_index = task_index.0 as i32;
+            let task_index = render_tasks.get_static_task_index(&task.task_id).0 as i32;
 
             let mut existing_batch_index = 0;
             for item in task.items.drain(..) {
-                let batch_key;
-                match item {
+                let batch_key = match item {
                     AlphaRenderItem::Composite(..) => {
-                        batch_key = AlphaBatchKey::composite();
+                        AlphaBatchKey::composite()
                     }
                     AlphaRenderItem::Blend(..) => {
-                        batch_key = AlphaBatchKey::blend();
+                        AlphaBatchKey::blend()
                     }
                     AlphaRenderItem::Primitive(sc_index, prim_index) => {
                         // See if this task fits into the tile UBO
                         let layer = &ctx.layer_store[sc_index.0];
                         let prim_metadata = ctx.prim_store.get_metadata(prim_index);
                         let transform_kind = layer.xf_rect.as_ref().unwrap().kind;
-                        let needs_clipping = prim_metadata.clip_index.is_some();
+                        let needs_clipping = prim_metadata.clip_cache_info.is_some();
                         let needs_blending = transform_kind == TransformedRectKind::Complex ||
                                              !prim_metadata.is_opaque ||
                                              needs_clipping;
@@ -506,15 +522,14 @@ impl AlphaBatcher {
 
                         let textures = BatchTextures {
                             colors: ctx.prim_store.get_color_textures(prim_metadata),
-                            mask: prim_metadata.mask_texture_id,
                         };
 
-                        batch_key = AlphaBatchKey::primitive(batch_kind,
-                                                             flags,
-                                                             blend_mode,
-                                                             textures);
+                        AlphaBatchKey::primitive(batch_kind,
+                                                 flags,
+                                                 blend_mode,
+                                                 textures)
                     }
-                }
+                };
 
                 while existing_batch_index < batches.len() &&
                         !batches[existing_batch_index].key.is_compatible_with(&batch_key) {
@@ -570,6 +585,58 @@ impl AlphaBatcher {
     }
 }
 
+#[derive(Debug)]
+pub struct ClipBatcher {
+    pub clears: Vec<CacheClipInstance>,
+    pub rectangles: Vec<CacheClipInstance>,
+    pub images: HashMap<SourceTexture, Vec<CacheClipInstance>>,
+}
+
+impl ClipBatcher {
+    fn new() -> ClipBatcher {
+        ClipBatcher {
+            clears: Vec::new(),
+            rectangles: Vec::new(),
+            images: HashMap::new(),
+        }
+    }
+
+    fn add(&mut self,
+           task_index: i32,
+           key: &MaskCacheKey,
+           task_info: &CacheMaskTask,
+           resource_cache: &ResourceCache) {
+        // TODO: don't draw clipping instances covering the whole tile
+        self.clears.push(CacheClipInstance {
+            task_id: task_index,
+            layer_index: key.layer_id.0 as i32,
+            address: GpuStoreAddress(0),
+            pad: 0,
+        });
+        self.rectangles.extend((0 .. key.clip_range.item_count as usize)
+                       .map(|region_id| {
+            CacheClipInstance {
+                task_id: task_index,
+                layer_index: key.layer_id.0 as i32,
+                address: GpuStoreAddress(key.clip_range.start.0 + ((CLIP_DATA_GPU_SIZE * region_id) as i32)),
+                pad: 0,
+            }
+        }));
+        if let (Some(address), Some(mask_key)) = (key.image, task_info.image) {
+            let cache_item = resource_cache.get_image(mask_key, ImageRendering::Auto);
+            self.images.entry(cache_item.texture_id)
+                        .or_insert(Vec::new())
+                        .push(CacheClipInstance {
+                task_id: task_index,
+                layer_index: key.layer_id.0 as i32,
+                address: address,
+                pad: 0,
+            })
+        }
+    }
+}
+
+
 struct CompileTileContext<'a> {
     layer_store: &'a [StackingContext],
     prim_store: &'a PrimitiveStore,
@@ -579,11 +646,13 @@ struct CompileTileContext<'a> {
 struct RenderTargetContext<'a> {
     layer_store: &'a [StackingContext],
     prim_store: &'a PrimitiveStore,
+    resource_cache: &'a ResourceCache,
 }
 
 /// A render target represents a number of rendering operations on a surface.
 pub struct RenderTarget {
     pub alpha_batcher: AlphaBatcher,
+    pub clip_batcher: ClipBatcher,
     pub box_shadow_cache_prims: Vec<CachePrimitiveInstance>,
     // List of text runs to be cached to this render target.
     // TODO(gw): For now, assume that these all come from
@@ -605,6 +674,7 @@ impl RenderTarget {
     fn new() -> RenderTarget {
         RenderTarget {
             alpha_batcher: AlphaBatcher::new(),
+            clip_batcher: ClipBatcher::new(),
             box_shadow_cache_prims: Vec::new(),
             text_run_cache_prims: Vec::new(),
             text_run_textures: BatchTextures::no_texture(),
@@ -686,7 +756,6 @@ impl RenderTarget {
                         // we switch the texture atlas to use texture layers!
                         let textures = BatchTextures {
                             colors: ctx.prim_store.get_color_textures(prim_metadata),
-                            mask: prim_metadata.mask_texture_id,
                         };
 
                         debug_assert!(textures.colors[0] != SourceTexture::Invalid);
@@ -709,6 +778,14 @@ impl RenderTarget {
                     }
                 }
             }
+            RenderTaskKind::CacheMask(ref task_info) => {
+                let key = match task.id {
+                    RenderTaskId::Dynamic(RenderTaskKey::CacheMask(ref key)) => key,
+                    _ => unreachable!()
+                };
+                let task_index = render_tasks.get_task_index(&task.id, pass_index).0 as i32;
+                self.clip_batcher.add(task_index, key, task_info, ctx.resource_cache);
+            }
         }
     }
 }
@@ -731,7 +808,7 @@ impl RenderPass {
             pass_index: RenderPassIndex(pass_index),
             is_framebuffer: is_framebuffer,
             targets: vec![ RenderTarget::new() ],
-            tasks: Vec::new(),
+            tasks: vec![],
         }
     }
 
@@ -767,7 +844,6 @@ impl RenderPass {
             match task.location {
                 RenderTaskLocation::Fixed(..) => {}
                 RenderTaskLocation::Dynamic(ref mut origin, ref size) => {
-
                     // See if this task is a duplicate from another tile.
                     // If so, just skip adding it!
                     match task.id {
@@ -827,10 +903,17 @@ pub struct AlphaRenderTask {
     items: Vec<AlphaRenderItem>,
 }
 
+#[derive(Debug, Clone)]
+pub struct CacheMaskTask {
+    actual_rect: DeviceRect,
+    image: Option<ImageKey>,
+}
+
 #[derive(Debug, Clone)]
 pub enum RenderTaskKind {
     Alpha(AlphaRenderTask),
     CachePrimitive(PrimitiveIndex),
+    CacheMask(CacheMaskTask),
     VerticalBlur(DeviceLength, PrimitiveIndex),
     HorizontalBlur(DeviceLength, PrimitiveIndex),
 }
@@ -862,8 +945,8 @@ impl RenderTask {
     }
 
     pub fn new_prim_cache(key: PrimitiveCacheKey,
-                      size: DeviceSize,
-                      prim_index: PrimitiveIndex) -> RenderTask {
+                          size: DeviceSize,
+                          prim_index: PrimitiveIndex) -> RenderTask {
         RenderTask {
             id: RenderTaskId::Dynamic(RenderTaskKey::CachePrimitive(key)),
             children: Vec::new(),
@@ -872,6 +955,23 @@ impl RenderTask {
         }
     }
 
+    fn new_mask(actual_rect: DeviceRect, cache_info: &MaskCacheInfo) -> Option<RenderTask> {
+        //CLIP TODO: handle a case where the tile is completely inside the intersection
+        if !actual_rect.intersects(&cache_info.device_rect) {
+            return None
+        };
+        let task_rect = cache_info.device_rect;
+        Some(RenderTask {
+            id: RenderTaskId::Dynamic(RenderTaskKey::CacheMask(cache_info.key)),
+            children: Vec::new(),
+            location: RenderTaskLocation::Dynamic(None, task_rect.size),
+            kind: RenderTaskKind::CacheMask(CacheMaskTask {
+                actual_rect: task_rect,
+                image: cache_info.image.map(|mask| mask.image),
+            }),
+        })
+    }
+
     // Construct a render task to apply a blur to a primitive. For now,
     // this is only used for text runs, but we can probably extend this
     // to handle general blurs to any render task in the future.
@@ -919,6 +1019,7 @@ impl RenderTask {
         match self.kind {
             RenderTaskKind::Alpha(ref mut task) => task,
             RenderTaskKind::CachePrimitive(..) |
+            RenderTaskKind::CacheMask(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::HorizontalBlur(..) => unreachable!(),
         }
@@ -928,12 +1029,10 @@ impl RenderTask {
     // of render task that is provided to the GPU shaders
     // via a vertex texture.
     fn write_task_data(&self) -> RenderTaskData {
+        let (target_rect, target_index) = self.get_target_rect();
         match self.kind {
             RenderTaskKind::Alpha(ref task) => {
-                let (target_rect, target_index) = self.get_target_rect();
-                debug_assert!(target_rect.size.width == task.actual_rect.size.width);
-                debug_assert!(target_rect.size.height == task.actual_rect.size.height);
-
+                debug_assert_eq!(target_rect.size, task.actual_rect.size);
                 RenderTaskData {
                     data: [
                         task.actual_rect.origin.x as f32,
@@ -948,8 +1047,6 @@ impl RenderTask {
                 }
             }
             RenderTaskKind::CachePrimitive(..) => {
-                let (target_rect, target_index) = self.get_target_rect();
-
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
@@ -963,10 +1060,23 @@ impl RenderTask {
                     ],
                 }
             }
+            RenderTaskKind::CacheMask(ref task) => {
+                debug_assert_eq!(target_rect.size, task.actual_rect.size);
+                RenderTaskData {
+                    data: [
+                        target_rect.origin.x as f32,
+                        target_rect.origin.y as f32,
+                        (target_rect.origin.x + target_rect.size.width) as f32,
+                        (target_rect.origin.y + target_rect.size.height) as f32,
+                        task.actual_rect.origin.x as f32,
+                        task.actual_rect.origin.y as f32,
+                        target_index.0 as f32,
+                        0.0,
+                    ],
+                }
+            }
             RenderTaskKind::VerticalBlur(blur_radius, _) |
             RenderTaskKind::HorizontalBlur(blur_radius, _) => {
-                let (target_rect, target_index) = self.get_target_rect();
-
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
@@ -1118,8 +1228,7 @@ impl AlphaBatchKey {
             self.blend_mode == other.blend_mode &&
             textures_compatible(self.textures.colors[0], other.textures.colors[0]) &&
             textures_compatible(self.textures.colors[1], other.textures.colors[1]) &&
-            textures_compatible(self.textures.colors[2], other.textures.colors[2]) &&
-            textures_compatible(self.textures.mask, other.textures.mask)
+            textures_compatible(self.textures.colors[2], other.textures.colors[2])
     }
 }
 
@@ -1152,13 +1261,24 @@ pub struct CachePrimitiveInstance {
     sub_index: i32,
 }
 
+/// A clipping primitive drawn into the clipping mask.
+/// Could be an image or a rectangle, which defines the
+/// way `address` is treated.
+#[derive(Debug)]
+pub struct CacheClipInstance {
+    task_id: i32,
+    layer_index: i32,
+    address: GpuStoreAddress,
+    pad: i32,
+}
+
 #[derive(Debug, Clone)]
 pub struct PrimitiveInstance {
     global_prim_id: i32,
     prim_address: GpuStoreAddress,
-    task_id: i32,
+    task_index: i32,
+    clip_task_index: i32,
     layer_index: i32,
-    clip_address: GpuStoreAddress,
     sub_index: i32,
     user_data: [i32; 2],
 }
@@ -1406,7 +1526,6 @@ pub struct FrameBuilder {
     prim_store: PrimitiveStore,
     cmds: Vec<PrimitiveRunCmd>,
     device_pixel_ratio: f32,
-    dummy_resources: DummyResources,
     debug: bool,
     config: FrameBuilderConfig,
 
@@ -1601,13 +1720,20 @@ impl ScreenTile {
                         // If an opaque primitive covers a tile entirely, we can discard
                         // all primitives underneath it.
                         if layer.xf_rect.as_ref().unwrap().kind == TransformedRectKind::AxisAligned &&
-                           prim_metadata.clip_index.is_none() &&
+                           prim_metadata.clip_cache_info.is_none() &&
                            prim_metadata.is_opaque &&
                            prim_bounding_rect.as_ref().unwrap().contains_rect(&self.rect) {
                             current_task.as_alpha_batch().items.clear();
                         }
                     }
 
+                    // Add a task to render the updated image mask
+                    if let Some(ref clip_info) = prim_metadata.clip_cache_info {
+                        let mask_task = RenderTask::new_mask(self.rect, clip_info)
+                                                   .expect("Primitive be culled by `prim_affects_tile` already");
+                        current_task.children.push(mask_task);
+                    }
+
                     // Add any dynamic render tasks needed to render this primitive
                     if let Some(ref render_task) = prim_metadata.render_task {
                         current_task.children.push(render_task.clone());
@@ -1635,7 +1761,6 @@ impl ScreenTile {
 impl FrameBuilder {
     pub fn new(viewport_size: Size2D<f32>,
                device_pixel_ratio: f32,
-               dummy_resources: DummyResources,
                debug: bool,
                config: FrameBuilderConfig) -> FrameBuilder {
         let viewport_size = Size2D::new(viewport_size.width as i32, viewport_size.height as i32);
@@ -1645,7 +1770,6 @@ impl FrameBuilder {
             prim_store: PrimitiveStore::new(device_pixel_ratio),
             cmds: Vec::new(),
             device_pixel_ratio: device_pixel_ratio,
-            dummy_resources: dummy_resources,
             debug: debug,
             packed_layers: Vec::new(),
             scrollbar_prims: Vec::new(),
@@ -1657,8 +1781,23 @@ impl FrameBuilder {
                      rect: &Rect<f32>,
                      clip_region: &ClipRegion,
                      container: PrimitiveContainer) -> PrimitiveIndex {
-        let prim_index = self.prim_store.add_primitive(rect,
-                                                       clip_region,
+
+        let geometry = PrimitiveGeometry {
+            local_rect: *rect,
+            local_clip_rect: clip_region.main,
+        };
+        let clip_source = if clip_region.is_complex() {
+            PrimitiveClipSource::Region(clip_region.clone())
+        } else {
+            PrimitiveClipSource::NoClip
+        };
+        let clip_info = MaskCacheInfo::new(&clip_source,
+                                           StackingContextIndex(self.layer_store.len() - 1),
+                                           &mut self.prim_store.gpu_data32);
+
+        let prim_index = self.prim_store.add_primitive(geometry,
+                                                       Box::new(clip_source),
+                                                       clip_info,
                                                        container);
 
         match self.cmds.last_mut().unwrap() {
@@ -2065,10 +2204,6 @@ impl FrameBuilder {
 
         // TODO(gw): Remove this stack once the layers refactor is done!
         let mut layer_stack: Vec<StackingContextIndex> = Vec::new();
-        let dummy_mask_cache_item = {
-            let opaque_mask_id = self.dummy_resources.opaque_mask_image_id;
-            resource_cache.get_image_by_cache_id(opaque_mask_id).clone()
-        };
 
         for cmd in &self.cmds {
             match cmd {
@@ -2080,15 +2215,15 @@ impl FrameBuilder {
                     layer.xf_rect = None;
                     layer.tile_range = None;
 
-                    if !layer.can_contribute_to_scene() {
-                        continue;
-                    }
-
                     let scroll_layer = &layer_map[&layer.scroll_layer_id];
                     packed_layer.transform = scroll_layer.world_content_transform
                                                          .pre_mul(&layer.local_transform);
                     packed_layer.inv_transform = packed_layer.transform.inverse().unwrap();
 
+                    if !layer.can_contribute_to_scene() {
+                        continue;
+                    }
+
                     let inv_layer_transform = layer.local_transform.inverse().unwrap();
                     let local_viewport_rect = scroll_layer.combined_local_viewport_rect;
                     let viewport_rect = inv_layer_transform.transform_rect(&local_viewport_rect);
@@ -2154,8 +2289,9 @@ impl FrameBuilder {
 
                             if self.prim_store.prepare_prim_for_render(prim_index,
                                                                        resource_cache,
+                                                                       &packed_layer.transform,
+                                                                       &packed_layer.local_clip_rect,
                                                                        self.device_pixel_ratio,
-                                                                       &dummy_mask_cache_item,
                                                                        auxiliary_lists) {
                                 self.prim_store.build_bounding_rect(prim_index,
                                                                     screen_rect,
@@ -2434,6 +2570,7 @@ impl FrameBuilder {
             let ctx = RenderTargetContext {
                 layer_store: &self.layer_store,
                 prim_store: &self.prim_store,
+                resource_cache: resource_cache,
             };
 
             // Do the allocations now, assigning each tile's tasks to a render
diff --git a/webrender_traits/src/types.rs b/webrender_traits/src/types.rs
index 81784882d2..03315984f6 100644
--- a/webrender_traits/src/types.rs
+++ b/webrender_traits/src/types.rs
@@ -324,7 +324,7 @@ pub enum ImageRendering {
     Pixelated,
 }
 
-#[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
+#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub struct ItemRange {
     pub start: usize,
     pub length: usize,