This is part III of my very irregularly scheduled series on compiler optimization. In part II a few (eight) years ago, I explained how the compiler optimizes away local variables in your code. If you haven't read it, TL;DR read it! Apparently the article has achieved cult status, inspiring longtime fans to constantly recite quotations from memory. “An ordinary build spends its time avoiding tense situations. An optimized build spends its time getting into tense situations.”
Over the years, some technical elements of the old article have become outdated. For example, the NSAutoreleasePool
class no longer exists. Also, Objective-C no longer exists. Fortunately, the gist of the article remains true today. Consider the following main.swift
files. The first does not use local variables:
import Foundation
func myProcessName() -> String {
return ProcessInfo.processInfo.processName.lowercased()
}
NSLog("My process name: \(myProcessName())")
And the second does use local variables:
import Foundation
func myProcessName() -> String {
let processInfo = ProcessInfo.processInfo
let processName = processInfo.processName
let lowercaseString = processName.lowercased()
return lowercaseString
}
NSLog("My process name: \(myProcessName())")
If you compile with no optimization (SWIFT_OPTIMIZATION_LEVEL = -Onone
), here's the disassembly of the first function using no local variables:
__T014LocalVariables13myProcessNameSSyF:
0000000100001510 pushq %rbp
0000000100001511 movq %rsp, %rbp
0000000100001514 subq $0x40, %rsp
0000000100001518 callq __T0So11ProcessInfoCMa
000000010000151d cmpq $0xe, (%rax)
0000000100001521 movq %rax, %rcx
0000000100001524 movq %rax, -0x8(%rbp)
0000000100001528 movq %rcx, -0x10(%rbp)
000000010000152c jne 0x10000153a
000000010000152e movq -0x8(%rbp), %rax
0000000100001532 movq 0x8(%rax), %rcx
0000000100001536 movq %rcx, -0x10(%rbp)
000000010000153a movq -0x10(%rbp), %rax
000000010000153e movq 0x5b1ce3(%rip), %rsi ## Objc selector ref: processInfo
0000000100001545 movq %rax, %rdi
0000000100001548 callq 0x100519406 ## Objc message: -[%rdi processInfo]
000000010000154d movq %rax, %rdi
0000000100001550 callq 0x10051942a ## symbol stub for: _objc_retainAutoreleasedReturnValue
0000000100001555 movq 0x5b1cd4(%rip), %rsi ## Objc selector ref: processName
000000010000155c movq %rax, %rdi
000000010000155f movq %rax, -0x18(%rbp)
0000000100001563 callq 0x100519406 ## Objc message: -[%rdi processName]
0000000100001568 movq %rax, %rdi
000000010000156b callq 0x10051942a ## symbol stub for: _objc_retainAutoreleasedReturnValue
0000000100001570 movq %rax, %rdi
0000000100001573 callq __T0SS10FoundationE36_unconditionallyBridgeFromObjectiveCSSSo8NSStringCSgFZ
0000000100001578 movq %rax, %rdi
000000010000157b movq %rdx, %rsi
000000010000157e movq %rcx, %rdx
0000000100001581 movq %rcx, -0x20(%rbp)
0000000100001585 callq __T0SS10lowercasedSSyF
000000010000158a movq -0x20(%rbp), %rdi
000000010000158e movq %rax, -0x28(%rbp)
0000000100001592 movq %rdx, -0x30(%rbp)
0000000100001596 movq %rcx, -0x38(%rbp)
000000010000159a callq _swift_unknownRelease
000000010000159f movq -0x18(%rbp), %rdi
00000001000015a3 callq 0x10051941e ## symbol stub for: _objc_release
00000001000015a8 movq -0x28(%rbp), %rax
00000001000015ac movq -0x30(%rbp), %rdx
00000001000015b0 movq -0x38(%rbp), %rcx
00000001000015b4 addq $0x40, %rsp
00000001000015b8 popq %rbp
00000001000015b9 retq
00000001000015ba nopw (%rax,%rax)
And here's the disassembly of the second function using local variables:
__T014LocalVariables13myProcessNameSSyF:
00000001000014f0 pushq %rbp
00000001000014f1 movq %rsp, %rbp
00000001000014f4 subq $0x70, %rsp
00000001000014f8 callq __T0So11ProcessInfoCMa
00000001000014fd cmpq $0xe, (%rax)
0000000100001501 movq %rax, %rcx
0000000100001504 movq %rax, -0x38(%rbp)
0000000100001508 movq %rcx, -0x40(%rbp)
000000010000150c jne 0x10000151a
000000010000150e movq -0x38(%rbp), %rax
0000000100001512 movq 0x8(%rax), %rcx
0000000100001516 movq %rcx, -0x40(%rbp)
000000010000151a movq -0x40(%rbp), %rax
000000010000151e movq 0x5b1d03(%rip), %rsi ## Objc selector ref: processInfo
0000000100001525 movq %rax, %rdi
0000000100001528 callq 0x100519406 ## Objc message: -[%rdi processInfo]
000000010000152d movq %rax, %rdi
0000000100001530 callq 0x10051942a ## symbol stub for: _objc_retainAutoreleasedReturnValue
0000000100001535 movq 0x5b1cf4(%rip), %rsi ## Objc selector ref: processName
000000010000153c movq %rax, %rdi
000000010000153f movq %rax, -0x48(%rbp)
0000000100001543 callq 0x100519406 ## Objc message: -[%rdi processName]
0000000100001548 movq %rax, %rdi
000000010000154b callq 0x10051942a ## symbol stub for: _objc_retainAutoreleasedReturnValue
0000000100001550 movq %rax, %rdi
0000000100001553 callq __T0SS10FoundationE36_unconditionallyBridgeFromObjectiveCSSSo8NSStringCSgFZ
0000000100001558 movq %rax, -0x18(%rbp)
000000010000155c movq %rdx, -0x10(%rbp)
0000000100001560 movq %rcx, -0x8(%rbp)
0000000100001564 movq %rax, %rdi
0000000100001567 movq %rdx, %rsi
000000010000156a movq %rcx, %rdx
000000010000156d movq %rcx, -0x50(%rbp)
0000000100001571 callq __T0SS10lowercasedSSyF
0000000100001576 movq %rax, -0x30(%rbp)
000000010000157a movq %rdx, -0x28(%rbp)
000000010000157e movq %rcx, -0x20(%rbp)
0000000100001582 movq -0x50(%rbp), %rdi
0000000100001586 movq %rax, -0x58(%rbp)
000000010000158a movq %rdx, -0x60(%rbp)
000000010000158e movq %rcx, -0x68(%rbp)
0000000100001592 callq _swift_unknownRelease
0000000100001597 movq -0x48(%rbp), %rdi
000000010000159b callq 0x10051941e ## symbol stub for: _objc_release
00000001000015a0 movq -0x48(%rbp), %rax
00000001000015a4 movq -0x58(%rbp), %rax
00000001000015a8 movq -0x60(%rbp), %rdx
00000001000015ac movq -0x68(%rbp), %rcx
00000001000015b0 addq $0x70, %rsp
00000001000015b4 popq %rbp
00000001000015b5 retq
00000001000015b6 nopw %cs:(%rax,%rax)
The function with local variables has 7 more machine instructions than the function with no local variables. This may sound like significant overhead, but modern processors execute billions of instructions per second, so they can do 7 faster than you can say … anything. The additional work is just moving values back and forth between registers and the stack, which is extremely fast.
If you're still worried, though, compile with optimization (SWIFT_OPTIMIZATION_LEVEL = -Owholemodule
). In that case, the functions are inlined into _main
instead of getting called by _main
. And with either function, _main
has identical disassembly. Try it and see with otool -tV
. In other words, local variables are free!
_main:
0000000100001800 pushq %rbp
0000000100001801 movq %rsp, %rbp
0000000100001804 pushq %r15
0000000100001806 pushq %r14
0000000100001808 pushq %r13
000000010000180a pushq %r12
000000010000180c pushq %rbx
000000010000180d subq $0x88, %rsp
0000000100001814 movq __T0s23_ContiguousArrayStorageCySSGML(%rip), %rdi
000000010000181b testq %rdi, %rdi
000000010000181e jne 0x100001836
0000000100001820 leaq __T0SSN(%rip), %rdi
0000000100001827 callq __T0s23_ContiguousArrayStorageCMa
000000010000182c movq %rax, %rdi
000000010000182f movq %rdi, __T0s23_ContiguousArrayStorageCySSGML(%rip)
0000000100001836 leaq -0xb0(%rbp), %rsi
000000010000183d callq _swift_initStackObject
0000000100001842 movq %rax, %r15
0000000100001845 movaps 0x4eb6e4(%rip), %xmm0
000000010000184c movups %xmm0, 0x10(%r15)
0000000100001851 leaq 0x4fe8d8(%rip), %rax ## literal pool for: "My process name: "
0000000100001858 movd %rax, %xmm0
000000010000185d movl $0x11, %eax
0000000100001862 movd %rax, %xmm1
0000000100001867 punpcklqdq %xmm1, %xmm0
000000010000186b movdqa %xmm0, -0x40(%rbp)
0000000100001870 movq $0x0, -0x30(%rbp)
0000000100001878 leaq -0x40(%rbp), %rdi
000000010000187c callq __T0s27_toStringReadOnlyStreamableSSxs010TextOutputE0RzlFTfq4g_nSS_Tg5Tf4n_g
0000000100001881 movq %rcx, %rbx
0000000100001884 movq %rax, 0x20(%r15)
0000000100001888 movq %rdx, 0x28(%r15)
000000010000188c movq %rbx, 0x30(%r15)
0000000100001890 movq 0x584cb1(%rip), %rdi ## Objc class ref: _OBJC_CLASS_$_NSProcessInfo
0000000100001897 callq _swift_rt_swift_getInitializedObjCClass
000000010000189c movq %rax, %r12
000000010000189f movq 0x5838c2(%rip), %r13 ## Objc selector ref: processInfo
00000001000018a6 movq %rbx, %rdi
00000001000018a9 callq _swift_unknownRetain
00000001000018ae leaq __swiftEmptyArrayStorage(%rip), %rdi
00000001000018b5 callq _swift_rt_swift_retain
00000001000018ba movq %r12, %rdi
00000001000018bd movq %r13, %rsi
00000001000018c0 callq 0x1004ebfa6 ## Objc message: +[NSProcessInfo processInfo]
00000001000018c5 movq %rax, %rdi
00000001000018c8 callq 0x1004ebfca ## symbol stub for: _objc_retainAutoreleasedReturnValue
00000001000018cd movq %rax, -0x48(%rbp)
00000001000018d1 movq 0x583898(%rip), %rsi ## Objc selector ref: processName
00000001000018d8 movq %rax, %rdi
00000001000018db callq 0x1004ebfa6 ## Objc message: -[%rdi processName]
00000001000018e0 movq %rax, %rdi
00000001000018e3 callq 0x1004ebfca ## symbol stub for: _objc_retainAutoreleasedReturnValue
00000001000018e8 movq %rax, %rdi
00000001000018eb callq __T0SS10FoundationE36_unconditionallyBridgeFromObjectiveCSSSo8NSStringCSgFZ
00000001000018f0 movq %rcx, %rbx
00000001000018f3 movq %rax, %rdi
00000001000018f6 movq %rdx, %rsi
00000001000018f9 movq %rbx, %rdx
00000001000018fc callq __T0SS10lowercasedSSyFTfq4x_n
0000000100001901 movq %rax, %r13
0000000100001904 movq %rdx, %r14
0000000100001907 movq %rcx, %r12
000000010000190a movq %rbx, %rdi
000000010000190d callq _swift_unknownRelease
0000000100001912 movq -0x48(%rbp), %rdi
0000000100001916 callq 0x1004ebfbe ## symbol stub for: _objc_release
000000010000191b movq %r13, -0x40(%rbp)
000000010000191f movq %r14, -0x38(%rbp)
0000000100001923 movq %r12, -0x30(%rbp)
0000000100001927 leaq -0x40(%rbp), %rdi
000000010000192b callq __T0s27_toStringReadOnlyStreamableSSxs010TextOutputE0RzlFTfq4g_nSS_Tg5Tf4n_g
0000000100001930 movq %rax, %r13
0000000100001933 movq %rdx, %r14
0000000100001936 movq %rcx, %rbx
0000000100001939 movq %rbx, %rdi
000000010000193c callq _swift_unknownRetain
0000000100001941 movq %r12, %rdi
0000000100001944 callq _swift_unknownRelease
0000000100001949 movq %r13, 0x38(%r15)
000000010000194d movq %r14, 0x40(%r15)
0000000100001951 movq %rbx, 0x48(%r15)
0000000100001955 leaq 0x4feb6c(%rip), %rax ## literal pool for: ""
000000010000195c movq %rax, -0x40(%rbp)
0000000100001960 movq $0x0, -0x30(%rbp)
0000000100001968 movq $0x0, -0x38(%rbp)
0000000100001970 leaq -0x40(%rbp), %rdi
0000000100001974 callq __T0s27_toStringReadOnlyStreamableSSxs010TextOutputE0RzlFTfq4g_nSS_Tg5Tf4n_g
0000000100001979 movq %rax, 0x50(%r15)
000000010000197d movq %rdx, 0x58(%r15)
0000000100001981 movq %rcx, 0x60(%r15)
0000000100001985 movq %rcx, %rdi
0000000100001988 callq _swift_unknownRetain
000000010000198d movq %r15, %rdi
0000000100001990 callq __T0S2SSaySSG19stringInterpolationd_tcfCTfq4nd_n
0000000100001995 movq %rax, %rdi
0000000100001998 movq %rdx, %rsi
000000010000199b movq %rcx, %rdx
000000010000199e leaq __swiftEmptyArrayStorage(%rip), %rcx
00000001000019a5 callq __T010Foundation5NSLogySS_Says7CVarArg_pGdtF
00000001000019aa xorl %eax, %eax
00000001000019ac addq $0x88, %rsp
00000001000019b3 popq %rbx
00000001000019b4 popq %r12
00000001000019b6 popq %r13
00000001000019b8 popq %r14
00000001000019ba popq %r15
00000001000019bc popq %rbp
00000001000019bd retq
00000001000019be nop
Swift, on the other hand, is not free. An 8-line main.swift
file compiles into an executable over 10 MB in size. That's just the price you pay for new shiny.
The moral of the story is, write your code so that it’s easy to debug. Be generous with local variables, and the local variables may return the favor someday. If it ain't broken, think about where you would break
it.