@@ -609,35 +609,95 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
609
609
* flush the object from the CPU cache.
610
610
*/
611
611
int i915_gem_obj_prepare_shmem_read (struct drm_i915_gem_object * obj ,
612
- int * needs_clflush )
612
+ unsigned int * needs_clflush )
613
613
{
614
614
int ret ;
615
615
616
616
* needs_clflush = 0 ;
617
617
618
- if (WARN_ON ( !i915_gem_object_has_struct_page (obj ) ))
619
- return - EINVAL ;
618
+ if (!i915_gem_object_has_struct_page (obj ))
619
+ return - ENODEV ;
620
620
621
621
ret = i915_gem_object_wait_rendering (obj , true);
622
622
if (ret )
623
623
return ret ;
624
624
625
- if (!(obj -> base .read_domains & I915_GEM_DOMAIN_CPU )) {
626
- /* If we're not in the cpu read domain, set ourself into the gtt
627
- * read domain and manually flush cachelines (if required). This
628
- * optimizes for the case when the gpu will dirty the data
629
- * anyway again before the next pread happens. */
625
+ /* If we're not in the cpu read domain, set ourself into the gtt
626
+ * read domain and manually flush cachelines (if required). This
627
+ * optimizes for the case when the gpu will dirty the data
628
+ * anyway again before the next pread happens.
629
+ */
630
+ if (!(obj -> base .read_domains & I915_GEM_DOMAIN_CPU ))
630
631
* needs_clflush = !cpu_cache_is_coherent (obj -> base .dev ,
631
632
obj -> cache_level );
633
+
634
+ ret = i915_gem_object_get_pages (obj );
635
+ if (ret )
636
+ return ret ;
637
+
638
+ i915_gem_object_pin_pages (obj );
639
+
640
+ if (* needs_clflush && !static_cpu_has (X86_FEATURE_CLFLUSH )) {
641
+ ret = i915_gem_object_set_to_cpu_domain (obj , false);
642
+ if (ret ) {
643
+ i915_gem_object_unpin_pages (obj );
644
+ return ret ;
645
+ }
646
+ * needs_clflush = 0 ;
632
647
}
633
648
649
+ return 0 ;
650
+ }
651
+
652
+ int i915_gem_obj_prepare_shmem_write (struct drm_i915_gem_object * obj ,
653
+ unsigned int * needs_clflush )
654
+ {
655
+ int ret ;
656
+
657
+ * needs_clflush = 0 ;
658
+ if (!i915_gem_object_has_struct_page (obj ))
659
+ return - ENODEV ;
660
+
661
+ ret = i915_gem_object_wait_rendering (obj , false);
662
+ if (ret )
663
+ return ret ;
664
+
665
+ /* If we're not in the cpu write domain, set ourself into the
666
+ * gtt write domain and manually flush cachelines (as required).
667
+ * This optimizes for the case when the gpu will use the data
668
+ * right away and we therefore have to clflush anyway.
669
+ */
670
+ if (obj -> base .write_domain != I915_GEM_DOMAIN_CPU )
671
+ * needs_clflush |= cpu_write_needs_clflush (obj ) << 1 ;
672
+
673
+ /* Same trick applies to invalidate partially written cachelines read
674
+ * before writing.
675
+ */
676
+ if (!(obj -> base .read_domains & I915_GEM_DOMAIN_CPU ))
677
+ * needs_clflush |= !cpu_cache_is_coherent (obj -> base .dev ,
678
+ obj -> cache_level );
679
+
634
680
ret = i915_gem_object_get_pages (obj );
635
681
if (ret )
636
682
return ret ;
637
683
638
684
i915_gem_object_pin_pages (obj );
639
685
640
- return ret ;
686
+ if (* needs_clflush && !static_cpu_has (X86_FEATURE_CLFLUSH )) {
687
+ ret = i915_gem_object_set_to_cpu_domain (obj , true);
688
+ if (ret ) {
689
+ i915_gem_object_unpin_pages (obj );
690
+ return ret ;
691
+ }
692
+ * needs_clflush = 0 ;
693
+ }
694
+
695
+ if ((* needs_clflush & CLFLUSH_AFTER ) == 0 )
696
+ obj -> cache_dirty = true;
697
+
698
+ intel_fb_obj_invalidate (obj , ORIGIN_CPU );
699
+ obj -> dirty = 1 ;
700
+ return 0 ;
641
701
}
642
702
643
703
/* Per-page copy function for the shmem pread fastpath.
@@ -872,19 +932,14 @@ i915_gem_shmem_pread(struct drm_device *dev,
872
932
int needs_clflush = 0 ;
873
933
struct sg_page_iter sg_iter ;
874
934
875
- if (!i915_gem_object_has_struct_page (obj ))
876
- return - ENODEV ;
877
-
878
- user_data = u64_to_user_ptr (args -> data_ptr );
879
- remain = args -> size ;
880
-
881
- obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle (obj );
882
-
883
935
ret = i915_gem_obj_prepare_shmem_read (obj , & needs_clflush );
884
936
if (ret )
885
937
return ret ;
886
938
939
+ obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle (obj );
940
+ user_data = u64_to_user_ptr (args -> data_ptr );
887
941
offset = args -> offset ;
942
+ remain = args -> size ;
888
943
889
944
for_each_sg_page (obj -> pages -> sgl , & sg_iter , obj -> pages -> nents ,
890
945
offset >> PAGE_SHIFT ) {
@@ -940,7 +995,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
940
995
}
941
996
942
997
out :
943
- i915_gem_object_unpin_pages (obj );
998
+ i915_gem_obj_finish_shmem_access (obj );
944
999
945
1000
return ret ;
946
1001
}
@@ -1248,42 +1303,17 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
1248
1303
int shmem_page_offset , page_length , ret = 0 ;
1249
1304
int obj_do_bit17_swizzling , page_do_bit17_swizzling ;
1250
1305
int hit_slowpath = 0 ;
1251
- int needs_clflush_after = 0 ;
1252
- int needs_clflush_before = 0 ;
1306
+ unsigned int needs_clflush ;
1253
1307
struct sg_page_iter sg_iter ;
1254
1308
1255
- user_data = u64_to_user_ptr (args -> data_ptr );
1256
- remain = args -> size ;
1257
-
1258
- obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle (obj );
1259
-
1260
- ret = i915_gem_object_wait_rendering (obj , false);
1309
+ ret = i915_gem_obj_prepare_shmem_write (obj , & needs_clflush );
1261
1310
if (ret )
1262
1311
return ret ;
1263
1312
1264
- if (obj -> base .write_domain != I915_GEM_DOMAIN_CPU ) {
1265
- /* If we're not in the cpu write domain, set ourself into the gtt
1266
- * write domain and manually flush cachelines (if required). This
1267
- * optimizes for the case when the gpu will use the data
1268
- * right away and we therefore have to clflush anyway. */
1269
- needs_clflush_after = cpu_write_needs_clflush (obj );
1270
- }
1271
- /* Same trick applies to invalidate partially written cachelines read
1272
- * before writing. */
1273
- if ((obj -> base .read_domains & I915_GEM_DOMAIN_CPU ) == 0 )
1274
- needs_clflush_before =
1275
- !cpu_cache_is_coherent (dev , obj -> cache_level );
1276
-
1277
- ret = i915_gem_object_get_pages (obj );
1278
- if (ret )
1279
- return ret ;
1280
-
1281
- intel_fb_obj_invalidate (obj , ORIGIN_CPU );
1282
-
1283
- i915_gem_object_pin_pages (obj );
1284
-
1313
+ obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle (obj );
1314
+ user_data = u64_to_user_ptr (args -> data_ptr );
1285
1315
offset = args -> offset ;
1286
- obj -> dirty = 1 ;
1316
+ remain = args -> size ;
1287
1317
1288
1318
for_each_sg_page (obj -> pages -> sgl , & sg_iter , obj -> pages -> nents ,
1289
1319
offset >> PAGE_SHIFT ) {
@@ -1307,7 +1337,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
1307
1337
/* If we don't overwrite a cacheline completely we need to be
1308
1338
* careful to have up-to-date data by first clflushing. Don't
1309
1339
* overcomplicate things and flush the entire patch. */
1310
- partial_cacheline_write = needs_clflush_before &&
1340
+ partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
1311
1341
((shmem_page_offset | page_length )
1312
1342
& (boot_cpu_data .x86_clflush_size - 1 ));
1313
1343
@@ -1317,7 +1347,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
1317
1347
ret = shmem_pwrite_fast (page , shmem_page_offset , page_length ,
1318
1348
user_data , page_do_bit17_swizzling ,
1319
1349
partial_cacheline_write ,
1320
- needs_clflush_after );
1350
+ needs_clflush & CLFLUSH_AFTER );
1321
1351
if (ret == 0 )
1322
1352
goto next_page ;
1323
1353
@@ -1326,7 +1356,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
1326
1356
ret = shmem_pwrite_slow (page , shmem_page_offset , page_length ,
1327
1357
user_data , page_do_bit17_swizzling ,
1328
1358
partial_cacheline_write ,
1329
- needs_clflush_after );
1359
+ needs_clflush & CLFLUSH_AFTER );
1330
1360
1331
1361
mutex_lock (& dev -> struct_mutex );
1332
1362
@@ -1340,25 +1370,23 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
1340
1370
}
1341
1371
1342
1372
out :
1343
- i915_gem_object_unpin_pages (obj );
1373
+ i915_gem_obj_finish_shmem_access (obj );
1344
1374
1345
1375
if (hit_slowpath ) {
1346
1376
/*
1347
1377
* Fixup: Flush cpu caches in case we didn't flush the dirty
1348
1378
* cachelines in-line while writing and the object moved
1349
1379
* out of the cpu write domain while we've dropped the lock.
1350
1380
*/
1351
- if (!needs_clflush_after &&
1381
+ if (!( needs_clflush & CLFLUSH_AFTER ) &&
1352
1382
obj -> base .write_domain != I915_GEM_DOMAIN_CPU ) {
1353
1383
if (i915_gem_clflush_object (obj , obj -> pin_display ))
1354
- needs_clflush_after = true ;
1384
+ needs_clflush |= CLFLUSH_AFTER ;
1355
1385
}
1356
1386
}
1357
1387
1358
- if (needs_clflush_after )
1388
+ if (needs_clflush & CLFLUSH_AFTER )
1359
1389
i915_gem_chipset_flush (to_i915 (dev ));
1360
- else
1361
- obj -> cache_dirty = true;
1362
1390
1363
1391
intel_fb_obj_flush (obj , false, ORIGIN_CPU );
1364
1392
return ret ;
@@ -1437,10 +1465,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1437
1465
if (ret == - EFAULT || ret == - ENOSPC ) {
1438
1466
if (obj -> phys_handle )
1439
1467
ret = i915_gem_phys_pwrite (obj , args , file );
1440
- else if (i915_gem_object_has_struct_page (obj ))
1441
- ret = i915_gem_shmem_pwrite (dev , obj , args , file );
1442
1468
else
1443
- ret = - ENODEV ;
1469
+ ret = i915_gem_shmem_pwrite ( dev , obj , args , file ) ;
1444
1470
}
1445
1471
1446
1472
i915_gem_object_put (obj );
0 commit comments