Skip to content

Commit 43394c7

Browse files
committed
drm/i915: Extract i915_gem_obj_prepare_shmem_write()
This is a companion to i915_gem_obj_prepare_shmem_read() that prepares the backing storage for direct writes. It first serialises with the GPU, pins the backing storage and then indicates what clfushes are required in order for the writes to be coherent. Whilst here, fix support for ancient CPUs without clflush for which we cannot do the GTT+clflush tricks. v2: Add i915_gem_obj_finish_shmem_access() for symmetry Signed-off-by: Chris Wilson <[email protected]> Reviewed-by: Joonas Lahtinen <[email protected]> Link: http://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 31a3920 commit 43394c7

File tree

3 files changed

+102
-65
lines changed

3 files changed

+102
-65
lines changed

drivers/gpu/drm/i915/i915_cmd_parser.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -973,7 +973,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
973973
u32 batch_start_offset,
974974
u32 batch_len)
975975
{
976-
int needs_clflush = 0;
976+
unsigned int needs_clflush;
977977
void *src_base, *src;
978978
void *dst = NULL;
979979
int ret;
@@ -1020,7 +1020,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
10201020
unmap_src:
10211021
vunmap(src_base);
10221022
unpin_src:
1023-
i915_gem_object_unpin_pages(src_obj);
1023+
i915_gem_obj_finish_shmem_access(src_obj);
10241024

10251025
return ret ? ERR_PTR(ret) : dst;
10261026
}

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3098,9 +3098,6 @@ int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
30983098
void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
30993099
void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
31003100

3101-
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
3102-
int *needs_clflush);
3103-
31043101
int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
31053102

31063103
static inline int __sg_page_count(struct scatterlist *sg)
@@ -3201,6 +3198,20 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
32013198
i915_gem_object_unpin_pages(obj);
32023199
}
32033200

3201+
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
3202+
unsigned int *needs_clflush);
3203+
int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
3204+
unsigned int *needs_clflush);
3205+
#define CLFLUSH_BEFORE 0x1
3206+
#define CLFLUSH_AFTER 0x2
3207+
#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)
3208+
3209+
static inline void
3210+
i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
3211+
{
3212+
i915_gem_object_unpin_pages(obj);
3213+
}
3214+
32043215
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
32053216
int i915_gem_object_sync(struct drm_i915_gem_object *obj,
32063217
struct drm_i915_gem_request *to);

drivers/gpu/drm/i915/i915_gem.c

Lines changed: 86 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -609,35 +609,95 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
609609
* flush the object from the CPU cache.
610610
*/
611611
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
612-
int *needs_clflush)
612+
unsigned int *needs_clflush)
613613
{
614614
int ret;
615615

616616
*needs_clflush = 0;
617617

618-
if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
619-
return -EINVAL;
618+
if (!i915_gem_object_has_struct_page(obj))
619+
return -ENODEV;
620620

621621
ret = i915_gem_object_wait_rendering(obj, true);
622622
if (ret)
623623
return ret;
624624

625-
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
626-
/* If we're not in the cpu read domain, set ourself into the gtt
627-
* read domain and manually flush cachelines (if required). This
628-
* optimizes for the case when the gpu will dirty the data
629-
* anyway again before the next pread happens. */
625+
/* If we're not in the cpu read domain, set ourself into the gtt
626+
* read domain and manually flush cachelines (if required). This
627+
* optimizes for the case when the gpu will dirty the data
628+
* anyway again before the next pread happens.
629+
*/
630+
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
630631
*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
631632
obj->cache_level);
633+
634+
ret = i915_gem_object_get_pages(obj);
635+
if (ret)
636+
return ret;
637+
638+
i915_gem_object_pin_pages(obj);
639+
640+
if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
641+
ret = i915_gem_object_set_to_cpu_domain(obj, false);
642+
if (ret) {
643+
i915_gem_object_unpin_pages(obj);
644+
return ret;
645+
}
646+
*needs_clflush = 0;
632647
}
633648

649+
return 0;
650+
}
651+
652+
int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
653+
unsigned int *needs_clflush)
654+
{
655+
int ret;
656+
657+
*needs_clflush = 0;
658+
if (!i915_gem_object_has_struct_page(obj))
659+
return -ENODEV;
660+
661+
ret = i915_gem_object_wait_rendering(obj, false);
662+
if (ret)
663+
return ret;
664+
665+
/* If we're not in the cpu write domain, set ourself into the
666+
* gtt write domain and manually flush cachelines (as required).
667+
* This optimizes for the case when the gpu will use the data
668+
* right away and we therefore have to clflush anyway.
669+
*/
670+
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
671+
*needs_clflush |= cpu_write_needs_clflush(obj) << 1;
672+
673+
/* Same trick applies to invalidate partially written cachelines read
674+
* before writing.
675+
*/
676+
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
677+
*needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
678+
obj->cache_level);
679+
634680
ret = i915_gem_object_get_pages(obj);
635681
if (ret)
636682
return ret;
637683

638684
i915_gem_object_pin_pages(obj);
639685

640-
return ret;
686+
if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
687+
ret = i915_gem_object_set_to_cpu_domain(obj, true);
688+
if (ret) {
689+
i915_gem_object_unpin_pages(obj);
690+
return ret;
691+
}
692+
*needs_clflush = 0;
693+
}
694+
695+
if ((*needs_clflush & CLFLUSH_AFTER) == 0)
696+
obj->cache_dirty = true;
697+
698+
intel_fb_obj_invalidate(obj, ORIGIN_CPU);
699+
obj->dirty = 1;
700+
return 0;
641701
}
642702

643703
/* Per-page copy function for the shmem pread fastpath.
@@ -872,19 +932,14 @@ i915_gem_shmem_pread(struct drm_device *dev,
872932
int needs_clflush = 0;
873933
struct sg_page_iter sg_iter;
874934

875-
if (!i915_gem_object_has_struct_page(obj))
876-
return -ENODEV;
877-
878-
user_data = u64_to_user_ptr(args->data_ptr);
879-
remain = args->size;
880-
881-
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
882-
883935
ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
884936
if (ret)
885937
return ret;
886938

939+
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
940+
user_data = u64_to_user_ptr(args->data_ptr);
887941
offset = args->offset;
942+
remain = args->size;
888943

889944
for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
890945
offset >> PAGE_SHIFT) {
@@ -940,7 +995,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
940995
}
941996

942997
out:
943-
i915_gem_object_unpin_pages(obj);
998+
i915_gem_obj_finish_shmem_access(obj);
944999

9451000
return ret;
9461001
}
@@ -1248,42 +1303,17 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
12481303
int shmem_page_offset, page_length, ret = 0;
12491304
int obj_do_bit17_swizzling, page_do_bit17_swizzling;
12501305
int hit_slowpath = 0;
1251-
int needs_clflush_after = 0;
1252-
int needs_clflush_before = 0;
1306+
unsigned int needs_clflush;
12531307
struct sg_page_iter sg_iter;
12541308

1255-
user_data = u64_to_user_ptr(args->data_ptr);
1256-
remain = args->size;
1257-
1258-
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1259-
1260-
ret = i915_gem_object_wait_rendering(obj, false);
1309+
ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
12611310
if (ret)
12621311
return ret;
12631312

1264-
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1265-
/* If we're not in the cpu write domain, set ourself into the gtt
1266-
* write domain and manually flush cachelines (if required). This
1267-
* optimizes for the case when the gpu will use the data
1268-
* right away and we therefore have to clflush anyway. */
1269-
needs_clflush_after = cpu_write_needs_clflush(obj);
1270-
}
1271-
/* Same trick applies to invalidate partially written cachelines read
1272-
* before writing. */
1273-
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1274-
needs_clflush_before =
1275-
!cpu_cache_is_coherent(dev, obj->cache_level);
1276-
1277-
ret = i915_gem_object_get_pages(obj);
1278-
if (ret)
1279-
return ret;
1280-
1281-
intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1282-
1283-
i915_gem_object_pin_pages(obj);
1284-
1313+
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1314+
user_data = u64_to_user_ptr(args->data_ptr);
12851315
offset = args->offset;
1286-
obj->dirty = 1;
1316+
remain = args->size;
12871317

12881318
for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
12891319
offset >> PAGE_SHIFT) {
@@ -1307,7 +1337,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
13071337
/* If we don't overwrite a cacheline completely we need to be
13081338
* careful to have up-to-date data by first clflushing. Don't
13091339
* overcomplicate things and flush the entire patch. */
1310-
partial_cacheline_write = needs_clflush_before &&
1340+
partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
13111341
((shmem_page_offset | page_length)
13121342
& (boot_cpu_data.x86_clflush_size - 1));
13131343

@@ -1317,7 +1347,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
13171347
ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
13181348
user_data, page_do_bit17_swizzling,
13191349
partial_cacheline_write,
1320-
needs_clflush_after);
1350+
needs_clflush & CLFLUSH_AFTER);
13211351
if (ret == 0)
13221352
goto next_page;
13231353

@@ -1326,7 +1356,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
13261356
ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
13271357
user_data, page_do_bit17_swizzling,
13281358
partial_cacheline_write,
1329-
needs_clflush_after);
1359+
needs_clflush & CLFLUSH_AFTER);
13301360

13311361
mutex_lock(&dev->struct_mutex);
13321362

@@ -1340,25 +1370,23 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
13401370
}
13411371

13421372
out:
1343-
i915_gem_object_unpin_pages(obj);
1373+
i915_gem_obj_finish_shmem_access(obj);
13441374

13451375
if (hit_slowpath) {
13461376
/*
13471377
* Fixup: Flush cpu caches in case we didn't flush the dirty
13481378
* cachelines in-line while writing and the object moved
13491379
* out of the cpu write domain while we've dropped the lock.
13501380
*/
1351-
if (!needs_clflush_after &&
1381+
if (!(needs_clflush & CLFLUSH_AFTER) &&
13521382
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
13531383
if (i915_gem_clflush_object(obj, obj->pin_display))
1354-
needs_clflush_after = true;
1384+
needs_clflush |= CLFLUSH_AFTER;
13551385
}
13561386
}
13571387

1358-
if (needs_clflush_after)
1388+
if (needs_clflush & CLFLUSH_AFTER)
13591389
i915_gem_chipset_flush(to_i915(dev));
1360-
else
1361-
obj->cache_dirty = true;
13621390

13631391
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
13641392
return ret;
@@ -1437,10 +1465,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
14371465
if (ret == -EFAULT || ret == -ENOSPC) {
14381466
if (obj->phys_handle)
14391467
ret = i915_gem_phys_pwrite(obj, args, file);
1440-
else if (i915_gem_object_has_struct_page(obj))
1441-
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
14421468
else
1443-
ret = -ENODEV;
1469+
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
14441470
}
14451471

14461472
i915_gem_object_put(obj);

0 commit comments

Comments
 (0)