@@ -3352,6 +3352,58 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1,
3352
3352
return phi;
3353
3353
}
3354
3354
3355
+ struct egal_desc {
3356
+ size_t offset;
3357
+ size_t nrepeats;
3358
+ size_t data_bytes;
3359
+ size_t padding_bytes;
3360
+ };
3361
+
3362
+ template <typename callback>
3363
+ static size_t emit_masked_bits_compare (callback &emit_desc, jl_datatype_t *aty, egal_desc ¤t_desc)
3364
+ {
3365
+ // Memcmp, but with masked padding
3366
+ size_t data_bytes = 0 ;
3367
+ size_t padding_bytes = 0 ;
3368
+ size_t nfields = jl_datatype_nfields (aty);
3369
+ size_t total_size = jl_datatype_size (aty);
3370
+ for (size_t i = 0 ; i < nfields; ++i) {
3371
+ size_t offset = jl_field_offset (aty, i);
3372
+ size_t fend = i == nfields - 1 ? total_size : jl_field_offset (aty, i + 1 );
3373
+ size_t fsz = jl_field_size (aty, i);
3374
+ jl_datatype_t *fty = (jl_datatype_t *)jl_field_type (aty, i);
3375
+ if (jl_field_isptr (aty, i) || !fty->layout ->flags .haspadding ) {
3376
+ // The field has no internal padding
3377
+ data_bytes += fsz;
3378
+ if (offset + fsz == fend) {
3379
+ // The field has no padding after. Merge this into the current
3380
+ // comparison range and go to next field.
3381
+ } else {
3382
+ padding_bytes = fend - offset - fsz;
3383
+ // Found padding. Either merge this into the current comparison
3384
+ // range, or emit the old one and start a new one.
3385
+ if (current_desc.data_bytes == data_bytes &&
3386
+ current_desc.padding_bytes == padding_bytes) {
3387
+ // Same as the previous range, just note that down, so we
3388
+ // emit this as a loop.
3389
+ current_desc.nrepeats += 1 ;
3390
+ } else {
3391
+ if (current_desc.nrepeats != 0 )
3392
+ emit_desc (current_desc);
3393
+ current_desc.nrepeats = 1 ;
3394
+ current_desc.data_bytes = data_bytes;
3395
+ current_desc.padding_bytes = padding_bytes;
3396
+ }
3397
+ data_bytes = 0 ;
3398
+ }
3399
+ } else {
3400
+ // The field may have internal padding. Recurse this.
3401
+ data_bytes += emit_masked_bits_compare (emit_desc, fty, current_desc);
3402
+ }
3403
+ }
3404
+ return data_bytes;
3405
+ }
3406
+
3355
3407
static Value *emit_bits_compare (jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2)
3356
3408
{
3357
3409
++EmittedBitsCompares;
@@ -3390,7 +3442,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
3390
3442
if (at->isAggregateType ()) { // Struct or Array
3391
3443
jl_datatype_t *sty = (jl_datatype_t *)arg1.typ ;
3392
3444
size_t sz = jl_datatype_size (sty);
3393
- if (sz > 512 && !sty->layout ->flags .haspadding ) {
3445
+ if (sz > 512 && !sty->layout ->flags .haspadding && sty-> layout -> flags . isbitsegal ) {
3394
3446
Value *varg1 = arg1.ispointer () ? data_pointer (ctx, arg1) :
3395
3447
value_to_pointer (ctx, arg1).V ;
3396
3448
Value *varg2 = arg2.ispointer () ? data_pointer (ctx, arg2) :
@@ -3427,6 +3479,89 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
3427
3479
}
3428
3480
return ctx.builder .CreateICmpEQ (answer, ConstantInt::get (getInt32Ty (ctx.builder .getContext ()), 0 ));
3429
3481
}
3482
+ else if (sz > 512 && jl_struct_try_layout (sty) && sty->layout ->flags .isbitsegal ) {
3483
+ Type *TInt8 = getInt8Ty (ctx.builder .getContext ());
3484
+ Type *TpInt8 = getInt8PtrTy (ctx.builder .getContext ());
3485
+ Type *TInt1 = getInt1Ty (ctx.builder .getContext ());
3486
+ Value *varg1 = arg1.ispointer () ? data_pointer (ctx, arg1) :
3487
+ value_to_pointer (ctx, arg1).V ;
3488
+ Value *varg2 = arg2.ispointer () ? data_pointer (ctx, arg2) :
3489
+ value_to_pointer (ctx, arg2).V ;
3490
+ varg1 = emit_pointer_from_objref (ctx, varg1);
3491
+ varg2 = emit_pointer_from_objref (ctx, varg2);
3492
+ varg1 = emit_bitcast (ctx, varg1, TpInt8);
3493
+ varg2 = emit_bitcast (ctx, varg2, TpInt8);
3494
+
3495
+ Value *answer = nullptr ;
3496
+ auto emit_desc = [&](egal_desc desc) {
3497
+ Value *ptr1 = varg1;
3498
+ Value *ptr2 = varg2;
3499
+ if (desc.offset != 0 ) {
3500
+ ptr1 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, ptr1, desc.offset );
3501
+ ptr2 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, ptr2, desc.offset );
3502
+ }
3503
+
3504
+ Value *new_ptr1 = ptr1;
3505
+ Value *endptr1 = nullptr ;
3506
+ BasicBlock *postBB = nullptr ;
3507
+ BasicBlock *loopBB = nullptr ;
3508
+ PHINode *answerphi = nullptr ;
3509
+ if (desc.nrepeats != 1 ) {
3510
+ // Set up loop
3511
+ endptr1 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, ptr1, desc.nrepeats * (desc.data_bytes + desc.padding_bytes ));;
3512
+
3513
+ BasicBlock *currBB = ctx.builder .GetInsertBlock ();
3514
+ loopBB = BasicBlock::Create (ctx.builder .getContext (), " egal_loop" , ctx.f );
3515
+ postBB = BasicBlock::Create (ctx.builder .getContext (), " post" , ctx.f );
3516
+ ctx.builder .CreateBr (loopBB);
3517
+
3518
+ ctx.builder .SetInsertPoint (loopBB);
3519
+ answerphi = ctx.builder .CreatePHI (TInt1, 2 );
3520
+ answerphi->addIncoming (answer ? answer : ConstantInt::get (TInt1, 1 ), currBB);
3521
+ answer = answerphi;
3522
+
3523
+ PHINode *itr1 = ctx.builder .CreatePHI (ptr1->getType (), 2 );
3524
+ PHINode *itr2 = ctx.builder .CreatePHI (ptr2->getType (), 2 );
3525
+
3526
+ new_ptr1 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, itr1, desc.data_bytes + desc.padding_bytes );
3527
+ itr1->addIncoming (ptr1, currBB);
3528
+ itr1->addIncoming (new_ptr1, loopBB);
3529
+
3530
+ Value *new_ptr2 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, itr2, desc.data_bytes + desc.padding_bytes );
3531
+ itr2->addIncoming (ptr2, currBB);
3532
+ itr2->addIncoming (new_ptr2, loopBB);
3533
+
3534
+ ptr1 = itr1;
3535
+ ptr2 = itr2;
3536
+ }
3537
+
3538
+ // Emit memcmp. TODO: LLVM has a pass to expand this for additional
3539
+ // performance.
3540
+ Value *this_answer = ctx.builder .CreateCall (prepare_call (memcmp_func),
3541
+ { ptr1,
3542
+ ptr2,
3543
+ ConstantInt::get (ctx.types ().T_size , desc.data_bytes ) });
3544
+ this_answer = ctx.builder .CreateICmpEQ (this_answer, ConstantInt::get (getInt32Ty (ctx.builder .getContext ()), 0 ));
3545
+ answer = answer ? ctx.builder .CreateAnd (answer, this_answer) : this_answer;
3546
+ if (endptr1) {
3547
+ answerphi->addIncoming (answer, loopBB);
3548
+ Value *loopend = ctx.builder .CreateICmpEQ (new_ptr1, endptr1);
3549
+ ctx.builder .CreateCondBr (loopend, postBB, loopBB);
3550
+ ctx.builder .SetInsertPoint (postBB);
3551
+ }
3552
+ };
3553
+ egal_desc current_desc = {0 };
3554
+ size_t trailing_data_bytes = emit_masked_bits_compare (emit_desc, sty, current_desc);
3555
+ assert (current_desc.nrepeats != 0 );
3556
+ emit_desc (current_desc);
3557
+ if (trailing_data_bytes != 0 ) {
3558
+ current_desc.nrepeats = 1 ;
3559
+ current_desc.data_bytes = trailing_data_bytes;
3560
+ current_desc.padding_bytes = 0 ;
3561
+ emit_desc (current_desc);
3562
+ }
3563
+ return answer;
3564
+ }
3430
3565
else {
3431
3566
jl_svec_t *types = sty->types ;
3432
3567
Value *answer = ConstantInt::get (getInt1Ty (ctx.builder .getContext ()), 1 );
0 commit comments