(mis)Optimization
Reads/writes to registers are quite special. I may even dare to say that they are embodiment of side
effects. In the previous example we wrote four different values to the same register. If you didn't
know that address was a register, you may have simplified the logic to just write the final value 1 << (11 + 16)
into the register.
Actually, LLVM, the compiler's backend / optimizer, does not know we are dealing with a register and will merge the writes thus changing the behavior of our program. Let's check that really quick.
$ cargo run --release
(..)
Breakpoint 1, main () at src/07-registers/src/main.rs:9
9 aux7::init();
(gdb) next
25 *(GPIOE_BSRR as *mut u32) = 1 << (11 + 16);
(gdb) disassemble /m
Dump of assembler code for function main:
7 #[entry]
8 fn main() -> ! {
9 aux7::init();
0x08000188 <+0>: bl 0x800019c <aux7::init>
0x0800018c <+4>: movw r0, #4120 ; 0x1018
0x08000190 <+8>: mov.w r1, #134217728 ; 0x8000000
0x08000194 <+12>: movt r0, #18432 ; 0x4800
10
11 unsafe {
12 // A magic address!
13 const GPIOE_BSRR: u32 = 0x48001018;
14
15 // Turn on the "North" LED (red)
16 *(GPIOE_BSRR as *mut u32) = 1 << 9;
17
18 // Turn on the "East" LED (green)
19 *(GPIOE_BSRR as *mut u32) = 1 << 11;
20
21 // Turn off the "North" LED
22 *(GPIOE_BSRR as *mut u32) = 1 << (9 + 16);
23
24 // Turn off the "East" LED
25 *(GPIOE_BSRR as *mut u32) = 1 << (11 + 16);
=> 0x08000198 <+16>: str r1, [r0, #0]
26 }
27
28 loop {}
0x0800019a <+18>: b.n 0x800019a <main+18>
End of assembler dump.
The state of the LEDs didn't change this time! The str
instruction is the one that writes a value
to the register. Our debug (unoptimized) program had four of them, one for each write to the
register, but the release (optimized) program only has one.
We can check that using objdump
:
$ # same as cargo objdump -- -d -no-show-raw-insn -print-imm-hex -source target/thumbv7em-none-eabihf/debug/registers
$ cargo objdump --bin registers -- -d -no-show-raw-insn -print-imm-hex -source
registers: file format ELF32-arm-little
Disassembly of section .text:
main:
; #[entry]
8000188: sub sp, #0x18
; aux7::init();
800018a: bl #0xbc
800018e: str r0, [sp, #0x14]
8000190: b #-0x2 <main+0xa>
; *(GPIOE_BSRR as *mut u32) = 1 << 9;
8000192: b #-0x2 <main+0xc>
8000194: movw r0, #0x1018
8000198: movt r0, #0x4800
800019c: mov.w r1, #0x200
80001a0: str r1, [r0]
; *(GPIOE_BSRR as *mut u32) = 1 << 11;
80001a2: b #-0x2 <main+0x1c>
80001a4: movw r0, #0x1018
80001a8: movt r0, #0x4800
80001ac: mov.w r1, #0x800
80001b0: str r1, [r0]
80001b2: movs r0, #0x19
; *(GPIOE_BSRR as *mut u32) = 1 << (9 + 16);
80001b4: mov r1, r0
80001b6: cmp r0, #0x9
80001b8: str r1, [sp, #0x10]
80001ba: bvs #0x54 <main+0x8a>
80001bc: b #-0x2 <main+0x36>
80001be: ldr r0, [sp, #0x10]
80001c0: and r1, r0, #0x1f
80001c4: movs r2, #0x1
80001c6: lsl.w r1, r2, r1
80001ca: lsrs r2, r0, #0x5
80001cc: cmp r2, #0x0
80001ce: str r1, [sp, #0xc]
80001d0: bne #0x4c <main+0x98>
80001d2: b #-0x2 <main+0x4c>
80001d4: movw r0, #0x1018
80001d8: movt r0, #0x4800
80001dc: ldr r1, [sp, #0xc]
80001de: str r1, [r0]
80001e0: movs r0, #0x1b
; *(GPIOE_BSRR as *mut u32) = 1 << (11 + 16);
80001e2: mov r2, r0
80001e4: cmp r0, #0xb
80001e6: str r2, [sp, #0x8]
80001e8: bvs #0x42 <main+0xa6>
80001ea: b #-0x2 <main+0x64>
80001ec: ldr r0, [sp, #0x8]
80001ee: and r1, r0, #0x1f
80001f2: movs r2, #0x1
80001f4: lsl.w r1, r2, r1
80001f8: lsrs r2, r0, #0x5
80001fa: cmp r2, #0x0
80001fc: str r1, [sp, #0x4]
80001fe: bne #0x3a <main+0xb4>
8000200: b #-0x2 <main+0x7a>
8000202: movw r0, #0x1018
8000206: movt r0, #0x4800
800020a: ldr r1, [sp, #0x4]
800020c: str r1, [r0]
; loop {}
800020e: b #-0x2 <main+0x88>
8000210: b #-0x4 <main+0x88>
; *(GPIOE_BSRR as *mut u32) = 1 << (9 + 16);
8000212: movw r0, #0x41bc
8000216: movt r0, #0x800
800021a: bl #0x3b28
800021e: trap
8000220: movw r0, #0x4204
8000224: movt r0, #0x800
8000228: bl #0x3b1a
800022c: trap
; *(GPIOE_BSRR as *mut u32) = 1 << (11 + 16);
800022e: movw r0, #0x421c
8000232: movt r0, #0x800
8000236: bl #0x3b0c
800023a: trap
800023c: movw r0, #0x4234
8000240: movt r0, #0x800
8000244: bl #0x3afe
8000248: trap
How do we prevent LLVM from misoptimizing our program? We use volatile operations instead of plain reads/writes:
#![no_main] #![no_std] use core::ptr; #[allow(unused_imports)] use aux7::{entry, iprint, iprintln}; #[entry] fn main() -> ! { aux7::init(); unsafe { // A magic address! const GPIOE_BSRR: u32 = 0x48001018; // Turn on the "North" LED (red) ptr::write_volatile(GPIOE_BSRR as *mut u32, 1 << 9); // Turn on the "East" LED (green) ptr::write_volatile(GPIOE_BSRR as *mut u32, 1 << 11); // Turn off the "North" LED ptr::write_volatile(GPIOE_BSRR as *mut u32, 1 << (9 + 16)); // Turn off the "East" LED ptr::write_volatile(GPIOE_BSRR as *mut u32, 1 << (11 + 16)); } loop {} }
If we look at the disassembly of this new program compiled in release mode:
$ cargo objdump --bin registers --release -- -d -no-show-raw-insn -print-imm-hex -source
registers: file format ELF32-arm-little
Disassembly of section .text:
main:
; #[entry]
8000188: bl #0x22
; aux7::init();
800018c: movw r0, #0x1018
8000190: mov.w r1, #0x200
8000194: movt r0, #0x4800
8000198: str r1, [r0]
800019a: mov.w r1, #0x800
800019e: str r1, [r0]
80001a0: mov.w r1, #0x2000000
80001a4: str r1, [r0]
80001a6: mov.w r1, #0x8000000
80001aa: str r1, [r0]
; loop {}
80001ac: b #-0x4 <main+0x24>
We see that the four writes (str
instructions) are preserved. If you run it (use stepi
), you'll
also see that behavior of the program is preserved.