Description
Working from this minimal example, I've worked out Swift code that ends up broken when compiled into AVR assembly, if and only if O1 or greater.
var brightness: Int16 = 0
var fadeAmount: Int16 = 5
while(true) {
brightness = brightness &+ fadeAmount
if (brightness <= 0 || brightness >= 255) {
if fadeAmount > 0 {
fadeAmount = -5
}
}
}
This produces the following LLVM IR when run through the swiftc in Xcode 8.3.3 (swiftc version Apple Swift version 3.1 (swiftlang-802.0.53 clang-802.0.42)).
(Note I've amended this LLVM IR with comments and with test labels so it makes a valid test in llvm-lit if placed in test/CodeGen/AVR.)
; RUN: llc -O=1 -mattr=lpm < %s -march=avr | FileCheck %s
; ModuleID = 'il-ordering-bug1-iffy8.ll'
source_filename = "il-ordering-bug1-iffy8.ll"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9"
%Vs5Int16 = type <{ i16 }>
@_Tv4main10brightnessVs5Int16 = hidden global %Vs5Int16 zeroinitializer, align 2
@_Tv4main10fadeAmountVs5Int16 = hidden global %Vs5Int16 zeroinitializer, align 2
@__swift_reflection_version = linkonce_odr hidden constant i16 1
@llvm.used = appending global [1 x i8*] [i8* bitcast (i16* @__swift_reflection_version to i8*)], section "llvm.metadata", align 8
; CHECK-LABEL: main:
define i32 @main(i32, i8**) #0 {
entry:
%2 = bitcast i8** %1 to i8*
; CHECK: ldi r24, 0
; CHECK-NEXT: ldi r25, 0
; CHECK-NEXT: sts _Tv4main10brightnessVs5Int16+1, r25
; CHECK-NEXT: sts _Tv4main10brightnessVs5Int16, r24
store i16 0, i16* getelementptr inbounds (%Vs5Int16, %Vs5Int16* @_Tv4main10brightnessVs5Int16, i32 0, i32 0), align 2
store i16 5, i16* getelementptr inbounds (%Vs5Int16, %Vs5Int16* @_Tv4main10fadeAmountVs5Int16, i32 0, i32 0), align 2
; the first label should occur after the global variables are initialised
; CHECK-LABEL: LBB0_1:
br label %3
; <label>:3: ; preds = %26, %entry
br label %4
; <label>:4: ; preds = %3
%5 = load i16, i16* getelementptr inbounds (%Vs5Int16, %Vs5Int16* @_Tv4main10brightnessVs5Int16, i32 0, i32 0), align 2
%6 = load i16, i16* getelementptr inbounds (%Vs5Int16, %Vs5Int16* @_Tv4main10fadeAmountVs5Int16, i32 0, i32 0), align 2
; the llvm.sadd.with.overflow function takes two int16 values
; and returns a struct with two values, the added value and a bool indicating overflow
%7 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %5, i16 %6)
%8 = extractvalue { i16, i1 } %7, 0 ; final added value
%9 = extractvalue { i16, i1 } %7, 1 ; overflow flag
; then stores it back in brightness
store i16 %8, i16* getelementptr inbounds (%Vs5Int16, %Vs5Int16* @_Tv4main10brightnessVs5Int16, i32 0, i32 0), align 2
; if (brightness<=0)
%10 = load i16, i16* getelementptr inbounds (%Vs5Int16, %Vs5Int16* @_Tv4main10brightnessVs5Int16, i32 0, i32 0), align 2
%11 = icmp sle i16 %10, 0
br i1 %11, label %12, label %15
; <label>:12: ; preds = %4
br label %13
; <label>:13: ; preds = %12, %19
; load %14 with true if we got here from <label>:12:, otherwise load it with %18 if we got here from ; <label>:19:
%14 = phi i1 [ %18, %19 ], [ true, %12 ]
br label %20
; <label>:15: ; preds = %4
br label %16
; <label>:16: ; preds = %15
; %18 = if (brightness>=255)
%17 = load i16, i16* getelementptr inbounds (%Vs5Int16, %Vs5Int16* @_Tv4main10brightnessVs5Int16, i32 0, i32 0), align 2
%18 = icmp sge i16 %17, 255
br label %19
; <label>:19: ; preds = %16
br label %13
; <label>:20: ; preds = %13
; if (brightness<=0||brightness>=255)
br i1 %14, label %21, label %26
; <label>:21: ; preds = %20
%22 = load i16, i16* getelementptr inbounds (%Vs5Int16, %Vs5Int16* @_Tv4main10fadeAmountVs5Int16, i32 0, i32 0), align 2
%23 = icmp sgt i16 %22, 0
br i1 %23, label %24, label %25
; <label>:24: ; preds = %21
store i16 -5, i16* getelementptr inbounds (%Vs5Int16, %Vs5Int16* @_Tv4main10fadeAmountVs5Int16, i32 0, i32 0), align 2
br label %25
; <label>:25: ; preds = %24, %21
br label %26
; CHECK: rjmp LBB0_1{{$}}
; CHECK-NEXT: .Lfunc_end0
; if (brightness>0&&brightness<255)
; <label>:26: ; preds = %25, %20
br label %3
}
; Function Attrs: nounwind readnone
declare { i16, i1 } @llvm.sadd.with.overflow.i16(i16, i16) #1
attributes #0 = { "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="core2" "target-features"="+ssse3,+cx16,+fxsr,+mmx,+x87,+sse,+sse2,+sse3" }
attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !9, !10}
!0 = !{i32 1, !"Objective-C Version", i32 2}
!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
!3 = !{i32 4, !"Objective-C Garbage Collection", i32 1024}
!4 = !{i32 1, !"Objective-C Class Properties", i32 64}
!5 = !{i32 6, !"Linker Options", !6}
!6 = !{!7, !8}
!7 = !{!"-lswiftCore"}
!8 = !{!"-lobjc"}
!9 = !{i32 1, !"PIC Level", i32 2}
!10 = !{i32 1, !"Swift Version", i32 4}
When compiled with -O=0, this produces very cumbersome but valid assembly:
.text
.macosx_version_min 10, 9
.file "<stdin>"
.globl main
.p2align 1
.type main,@function
main: ; @main
; BB#0: ; %entry
push r28
push r29
in r28, 61
in r29, 62
sbiw r28, 7
in r0, 63
cli
out 62, r29
out 63, r0
out 61, r28
ldi r24, 0
ldi r25, 0
sts _Tv4main10brightnessVs5Int16+1, r25
sts _Tv4main10brightnessVs5Int16, r24
ldi r24, 5
ldi r25, 0
sts _Tv4main10fadeAmountVs5Int16+1, r25
sts _Tv4main10fadeAmountVs5Int16, r24
std Y+6, r20 ; 2-byte Folded Spill
std Y+7, r21 ; 2-byte Folded Spill
rjmp LBB0_1
LBB0_1: ; =>This Inner Loop Header: Depth=1
rjmp LBB0_2
LBB0_2: ; in Loop: Header=BB0_1 Depth=1
lds r24, _Tv4main10brightnessVs5Int16
lds r25, _Tv4main10brightnessVs5Int16+1
lds r18, _Tv4main10fadeAmountVs5Int16
lds r19, _Tv4main10fadeAmountVs5Int16+1
add r24, r18
adc r25, r19
sts _Tv4main10brightnessVs5Int16+1, r25
sts _Tv4main10brightnessVs5Int16, r24
lds r24, _Tv4main10brightnessVs5Int16
lds r25, _Tv4main10brightnessVs5Int16+1
ldi r18, 0
ldi r19, 0
cp r18, r24
cpc r19, r25
brlt LBB0_5
rjmp LBB0_3
LBB0_3: ; in Loop: Header=BB0_1 Depth=1
ldi r24, 1
std Y+5, r24 ; 1-byte Folded Spill
rjmp LBB0_4
LBB0_4: ; in Loop: Header=BB0_1 Depth=1
ldd r24, Y+5 ; 1-byte Folded Reload
std Y+4, r24 ; 1-byte Folded Spill
rjmp LBB0_10
LBB0_5: ; in Loop: Header=BB0_1 Depth=1
rjmp LBB0_6
LBB0_6: ; in Loop: Header=BB0_1 Depth=1
lds r24, _Tv4main10brightnessVs5Int16
lds r25, _Tv4main10brightnessVs5Int16+1
ldi r18, 255
ldi r19, 0
ldi r20, 0
ldi r21, 1
cp r24, r18
cpc r25, r19
std Y+3, r20 ; 1-byte Folded Spill
std Y+2, r21 ; 1-byte Folded Spill
brge LBB0_7
rjmp LBB0_8
LBB0_7: ; in Loop: Header=BB0_1 Depth=1
ldd r24, Y+2 ; 1-byte Folded Reload
std Y+1, r24 ; 1-byte Folded Spill
rjmp LBB0_9
LBB0_8: ; in Loop: Header=BB0_1 Depth=1
ldd r24, Y+3 ; 1-byte Folded Reload
std Y+2, r24 ; 1-byte Folded Spill
rjmp LBB0_7
LBB0_9: ; in Loop: Header=BB0_1 Depth=1
ldd r24, Y+1 ; 1-byte Folded Reload
std Y+5, r24 ; 1-byte Folded Spill
rjmp LBB0_4
LBB0_10: ; in Loop: Header=BB0_1 Depth=1
ldd r24, Y+4 ; 1-byte Folded Reload
andi r24, 1
cpi r24, 0
breq LBB0_14
rjmp LBB0_11
LBB0_11: ; in Loop: Header=BB0_1 Depth=1
lds r24, _Tv4main10fadeAmountVs5Int16
lds r25, _Tv4main10fadeAmountVs5Int16+1
ldi r18, 0
ldi r19, 0
cp r18, r24
cpc r19, r25
brge LBB0_13
rjmp LBB0_12
LBB0_12: ; in Loop: Header=BB0_1 Depth=1
ldi r24, 251
ldi r25, 255
sts _Tv4main10fadeAmountVs5Int16+1, r25
sts _Tv4main10fadeAmountVs5Int16, r24
rjmp LBB0_13
LBB0_13: ; in Loop: Header=BB0_1 Depth=1
rjmp LBB0_14
LBB0_14: ; in Loop: Header=BB0_1 Depth=1
rjmp LBB0_1
.Lfunc_end0:
.size main, .Lfunc_end0-main
.hidden _Tv4main10brightnessVs5Int16 ; @_Tv4main10brightnessVs5Int16
.type _Tv4main10brightnessVs5Int16,@object
.section .bss,"aw",@nobits
.globl _Tv4main10brightnessVs5Int16
.p2align 1
_Tv4main10brightnessVs5Int16:
.zero 2
.size _Tv4main10brightnessVs5Int16, 2
.hidden _Tv4main10fadeAmountVs5Int16 ; @_Tv4main10fadeAmountVs5Int16
.type _Tv4main10fadeAmountVs5Int16,@object
.globl _Tv4main10fadeAmountVs5Int16
.p2align 1
_Tv4main10fadeAmountVs5Int16:
.zero 2
.size _Tv4main10fadeAmountVs5Int16, 2
.hidden __swift_reflection_version ; @__swift_reflection_version
.type __swift_reflection_version,@object
.section .rodata,"a",@progbits
.weak __swift_reflection_version
__swift_reflection_version:
.short 1 ; 0x1
.size __swift_reflection_version, 2
; Declaring this symbol tells the CRT that it should
;copy all variables from program memory to RAM on startup
.globl __do_copy_data
; Declaring this symbol tells the CRT that it should
;clear the zeroed data section on startup
.globl __do_clear_bss
When compiled with -O=1 we get this assembly, which is much more efficient but doesn't work. In particular the while (true) {} loop is effectively broken.
The rjmp jump back to LBB0_1 should (probably?) be the last instruction. Instead there's another couple of labels after that. Looks like some reordering went wrong during optimisation?
.text
.macosx_version_min 10, 9
.file "<stdin>"
.globl main
.p2align 1
.type main,@function
main: ; @main
; BB#0: ; %entry
ldi r24, 0
ldi r25, 0
sts _Tv4main10brightnessVs5Int16+1, r25
sts _Tv4main10brightnessVs5Int16, r24
ldi r18, 5
ldi r19, 0
sts _Tv4main10fadeAmountVs5Int16+1, r19
sts _Tv4main10fadeAmountVs5Int16, r18
ldi r18, 251
ldi r19, 255
ldi r20, 255
ldi r21, 0
rjmp LBB0_2
LBB0_1: ; in Loop: Header=BB0_2 Depth=1
sts _Tv4main10fadeAmountVs5Int16+1, r19
sts _Tv4main10fadeAmountVs5Int16, r18
LBB0_2: ; =>This Inner Loop Header: Depth=1
lds r22, _Tv4main10fadeAmountVs5Int16
lds r23, _Tv4main10fadeAmountVs5Int16+1
lds r30, _Tv4main10brightnessVs5Int16
lds r31, _Tv4main10brightnessVs5Int16+1
add r30, r22
adc r31, r23
sts _Tv4main10brightnessVs5Int16+1, r31
sts _Tv4main10brightnessVs5Int16, r30
ldi r22, 1
cp r24, r30
cpc r25, r31
brlt LBB0_5
; BB#3: ; in Loop: Header=BB0_2 Depth=1
andi r22, 1
cpi r22, 0
breq LBB0_2
; BB#4: ; in Loop: Header=BB0_2 Depth=1
lds r22, _Tv4main10fadeAmountVs5Int16
lds r23, _Tv4main10fadeAmountVs5Int16+1
cp r24, r22
cpc r25, r23
brge LBB0_2
rjmp LBB0_1
LBB0_5:
lds r30, _Tv4main10brightnessVs5Int16
lds r31, _Tv4main10brightnessVs5Int16+1
cp r30, r20
cpc r31, r21
brge LBB0_7
; BB#6:
ldi r22, 0
LBB0_7:
.Lfunc_end0:
.size main, .Lfunc_end0-main
.hidden _Tv4main10brightnessVs5Int16 ; @_Tv4main10brightnessVs5Int16
.type _Tv4main10brightnessVs5Int16,@object
.section .bss,"aw",@nobits
.globl _Tv4main10brightnessVs5Int16
.p2align 1
_Tv4main10brightnessVs5Int16:
.zero 2
.size _Tv4main10brightnessVs5Int16, 2
.hidden _Tv4main10fadeAmountVs5Int16 ; @_Tv4main10fadeAmountVs5Int16
.type _Tv4main10fadeAmountVs5Int16,@object
.globl _Tv4main10fadeAmountVs5Int16
.p2align 1
_Tv4main10fadeAmountVs5Int16:
.zero 2
.size _Tv4main10fadeAmountVs5Int16, 2
.hidden __swift_reflection_version ; @__swift_reflection_version
.type __swift_reflection_version,@object
.section .rodata,"a",@progbits
.weak __swift_reflection_version
__swift_reflection_version:
.short 1 ; 0x1
.size __swift_reflection_version, 2
; Declaring this symbol tells the CRT that it should
;copy all variables from program memory to RAM on startup
.globl __do_copy_data
; Declaring this symbol tells the CRT that it should
;clear the zeroed data section on startup
.globl __do_clear_bss
I want to use this script with bugpoint to reduce the test case but I'm new to this so it will take a while to get my head round the tool.
#!/bin/sh
PATH="build/llvm/bin:$PATH"
llc -O=1 -mattr=lpm < "$@" -march=avr | FileCheck "$@"