Local variables are still free, in Swift

October 14, 2017

By Jeff Johnson

This is part III of my very irregularly scheduled series on compiler optimization. In part II a few (eight) years ago, I explained how the compiler optimizes away local variables in your code. If you haven't read it, TL;DR read it! Apparently the article has achieved cult status, inspiring longtime fans to constantly recite quotations from memory. “An ordinary build spends its time avoiding tense situations. An optimized build spends its time getting into tense situations.”

Over the years, some technical elements of the old article have become outdated. For example, the NSAutoreleasePool class no longer exists. Also, Objective-C no longer exists. Fortunately, the gist of the article remains true today. Consider the following main.swift files. The first does not use local variables:

    import Foundation

    func myProcessName() -> String {
        return ProcessInfo.processInfo.processName.lowercased()
    }

    NSLog("My process name: \(myProcessName())")

And the second does use local variables:

    import Foundation

    func myProcessName() -> String {
        let processInfo = ProcessInfo.processInfo
        let processName = processInfo.processName
        let lowercaseString = processName.lowercased()
        return lowercaseString
    }

    NSLog("My process name: \(myProcessName())")

If you compile with no optimization (SWIFT_OPTIMIZATION_LEVEL = -Onone), here's the disassembly of the first function using no local variables:

__T014LocalVariables13myProcessNameSSyF:
0000000100001510	pushq	%rbp
0000000100001511	movq	%rsp, %rbp
0000000100001514	subq	$0x40, %rsp
0000000100001518	callq	__T0So11ProcessInfoCMa
000000010000151d	cmpq	$0xe, (%rax)
0000000100001521	movq	%rax, %rcx
0000000100001524	movq	%rax, -0x8(%rbp)
0000000100001528	movq	%rcx, -0x10(%rbp)
000000010000152c	jne	0x10000153a
000000010000152e	movq	-0x8(%rbp), %rax
0000000100001532	movq	0x8(%rax), %rcx
0000000100001536	movq	%rcx, -0x10(%rbp)
000000010000153a	movq	-0x10(%rbp), %rax
000000010000153e	movq	0x5b1ce3(%rip), %rsi ## Objc selector ref: processInfo
0000000100001545	movq	%rax, %rdi
0000000100001548	callq	0x100519406 ## Objc message: -[%rdi processInfo]
000000010000154d	movq	%rax, %rdi
0000000100001550	callq	0x10051942a ## symbol stub for: _objc_retainAutoreleasedReturnValue
0000000100001555	movq	0x5b1cd4(%rip), %rsi ## Objc selector ref: processName
000000010000155c	movq	%rax, %rdi
000000010000155f	movq	%rax, -0x18(%rbp)
0000000100001563	callq	0x100519406 ## Objc message: -[%rdi processName]
0000000100001568	movq	%rax, %rdi
000000010000156b	callq	0x10051942a ## symbol stub for: _objc_retainAutoreleasedReturnValue
0000000100001570	movq	%rax, %rdi
0000000100001573	callq	__T0SS10FoundationE36_unconditionallyBridgeFromObjectiveCSSSo8NSStringCSgFZ
0000000100001578	movq	%rax, %rdi
000000010000157b	movq	%rdx, %rsi
000000010000157e	movq	%rcx, %rdx
0000000100001581	movq	%rcx, -0x20(%rbp)
0000000100001585	callq	__T0SS10lowercasedSSyF
000000010000158a	movq	-0x20(%rbp), %rdi
000000010000158e	movq	%rax, -0x28(%rbp)
0000000100001592	movq	%rdx, -0x30(%rbp)
0000000100001596	movq	%rcx, -0x38(%rbp)
000000010000159a	callq	_swift_unknownRelease
000000010000159f	movq	-0x18(%rbp), %rdi
00000001000015a3	callq	0x10051941e ## symbol stub for: _objc_release
00000001000015a8	movq	-0x28(%rbp), %rax
00000001000015ac	movq	-0x30(%rbp), %rdx
00000001000015b0	movq	-0x38(%rbp), %rcx
00000001000015b4	addq	$0x40, %rsp
00000001000015b8	popq	%rbp
00000001000015b9	retq
00000001000015ba	nopw	(%rax,%rax)

And here's the disassembly of the second function using local variables:

__T014LocalVariables13myProcessNameSSyF:
00000001000014f0	pushq	%rbp
00000001000014f1	movq	%rsp, %rbp
00000001000014f4	subq	$0x70, %rsp
00000001000014f8	callq	__T0So11ProcessInfoCMa
00000001000014fd	cmpq	$0xe, (%rax)
0000000100001501	movq	%rax, %rcx
0000000100001504	movq	%rax, -0x38(%rbp)
0000000100001508	movq	%rcx, -0x40(%rbp)
000000010000150c	jne	0x10000151a
000000010000150e	movq	-0x38(%rbp), %rax
0000000100001512	movq	0x8(%rax), %rcx
0000000100001516	movq	%rcx, -0x40(%rbp)
000000010000151a	movq	-0x40(%rbp), %rax
000000010000151e	movq	0x5b1d03(%rip), %rsi ## Objc selector ref: processInfo
0000000100001525	movq	%rax, %rdi
0000000100001528	callq	0x100519406 ## Objc message: -[%rdi processInfo]
000000010000152d	movq	%rax, %rdi
0000000100001530	callq	0x10051942a ## symbol stub for: _objc_retainAutoreleasedReturnValue
0000000100001535	movq	0x5b1cf4(%rip), %rsi ## Objc selector ref: processName
000000010000153c	movq	%rax, %rdi
000000010000153f	movq	%rax, -0x48(%rbp)
0000000100001543	callq	0x100519406 ## Objc message: -[%rdi processName]
0000000100001548	movq	%rax, %rdi
000000010000154b	callq	0x10051942a ## symbol stub for: _objc_retainAutoreleasedReturnValue
0000000100001550	movq	%rax, %rdi
0000000100001553	callq	__T0SS10FoundationE36_unconditionallyBridgeFromObjectiveCSSSo8NSStringCSgFZ
0000000100001558	movq	%rax, -0x18(%rbp)
000000010000155c	movq	%rdx, -0x10(%rbp)
0000000100001560	movq	%rcx, -0x8(%rbp)
0000000100001564	movq	%rax, %rdi
0000000100001567	movq	%rdx, %rsi
000000010000156a	movq	%rcx, %rdx
000000010000156d	movq	%rcx, -0x50(%rbp)
0000000100001571	callq	__T0SS10lowercasedSSyF
0000000100001576	movq	%rax, -0x30(%rbp)
000000010000157a	movq	%rdx, -0x28(%rbp)
000000010000157e	movq	%rcx, -0x20(%rbp)
0000000100001582	movq	-0x50(%rbp), %rdi
0000000100001586	movq	%rax, -0x58(%rbp)
000000010000158a	movq	%rdx, -0x60(%rbp)
000000010000158e	movq	%rcx, -0x68(%rbp)
0000000100001592	callq	_swift_unknownRelease
0000000100001597	movq	-0x48(%rbp), %rdi
000000010000159b	callq	0x10051941e ## symbol stub for: _objc_release
00000001000015a0	movq	-0x48(%rbp), %rax
00000001000015a4	movq	-0x58(%rbp), %rax
00000001000015a8	movq	-0x60(%rbp), %rdx
00000001000015ac	movq	-0x68(%rbp), %rcx
00000001000015b0	addq	$0x70, %rsp
00000001000015b4	popq	%rbp
00000001000015b5	retq
00000001000015b6	nopw	%cs:(%rax,%rax)

The function with local variables has 7 more machine instructions than the function with no local variables. This may sound like significant overhead, but modern processors execute billions of instructions per second, so they can do 7 faster than you can say … anything. The additional work is just moving values back and forth between registers and the stack, which is extremely fast.

If you're still worried, though, compile with optimization (SWIFT_OPTIMIZATION_LEVEL = -Owholemodule). In that case, the functions are inlined into _main instead of getting called by _main. And with either function, _main has identical disassembly. Try it and see with otool -tV. In other words, local variables are free!

_main:
0000000100001800	pushq	%rbp
0000000100001801	movq	%rsp, %rbp
0000000100001804	pushq	%r15
0000000100001806	pushq	%r14
0000000100001808	pushq	%r13
000000010000180a	pushq	%r12
000000010000180c	pushq	%rbx
000000010000180d	subq	$0x88, %rsp
0000000100001814	movq	__T0s23_ContiguousArrayStorageCySSGML(%rip), %rdi
000000010000181b	testq	%rdi, %rdi
000000010000181e	jne	0x100001836
0000000100001820	leaq	__T0SSN(%rip), %rdi
0000000100001827	callq	__T0s23_ContiguousArrayStorageCMa
000000010000182c	movq	%rax, %rdi
000000010000182f	movq	%rdi, __T0s23_ContiguousArrayStorageCySSGML(%rip)
0000000100001836	leaq	-0xb0(%rbp), %rsi
000000010000183d	callq	_swift_initStackObject
0000000100001842	movq	%rax, %r15
0000000100001845	movaps	0x4eb6e4(%rip), %xmm0
000000010000184c	movups	%xmm0, 0x10(%r15)
0000000100001851	leaq	0x4fe8d8(%rip), %rax ## literal pool for: "My process name: "
0000000100001858	movd	%rax, %xmm0
000000010000185d	movl	$0x11, %eax
0000000100001862	movd	%rax, %xmm1
0000000100001867	punpcklqdq	%xmm1, %xmm0
000000010000186b	movdqa	%xmm0, -0x40(%rbp)
0000000100001870	movq	$0x0, -0x30(%rbp)
0000000100001878	leaq	-0x40(%rbp), %rdi
000000010000187c	callq	__T0s27_toStringReadOnlyStreamableSSxs010TextOutputE0RzlFTfq4g_nSS_Tg5Tf4n_g
0000000100001881	movq	%rcx, %rbx
0000000100001884	movq	%rax, 0x20(%r15)
0000000100001888	movq	%rdx, 0x28(%r15)
000000010000188c	movq	%rbx, 0x30(%r15)
0000000100001890	movq	0x584cb1(%rip), %rdi ## Objc class ref: _OBJC_CLASS_$_NSProcessInfo
0000000100001897	callq	_swift_rt_swift_getInitializedObjCClass
000000010000189c	movq	%rax, %r12
000000010000189f	movq	0x5838c2(%rip), %r13 ## Objc selector ref: processInfo
00000001000018a6	movq	%rbx, %rdi
00000001000018a9	callq	_swift_unknownRetain
00000001000018ae	leaq	__swiftEmptyArrayStorage(%rip), %rdi
00000001000018b5	callq	_swift_rt_swift_retain
00000001000018ba	movq	%r12, %rdi
00000001000018bd	movq	%r13, %rsi
00000001000018c0	callq	0x1004ebfa6 ## Objc message: +[NSProcessInfo processInfo]
00000001000018c5	movq	%rax, %rdi
00000001000018c8	callq	0x1004ebfca ## symbol stub for: _objc_retainAutoreleasedReturnValue
00000001000018cd	movq	%rax, -0x48(%rbp)
00000001000018d1	movq	0x583898(%rip), %rsi ## Objc selector ref: processName
00000001000018d8	movq	%rax, %rdi
00000001000018db	callq	0x1004ebfa6 ## Objc message: -[%rdi processName]
00000001000018e0	movq	%rax, %rdi
00000001000018e3	callq	0x1004ebfca ## symbol stub for: _objc_retainAutoreleasedReturnValue
00000001000018e8	movq	%rax, %rdi
00000001000018eb	callq	__T0SS10FoundationE36_unconditionallyBridgeFromObjectiveCSSSo8NSStringCSgFZ
00000001000018f0	movq	%rcx, %rbx
00000001000018f3	movq	%rax, %rdi
00000001000018f6	movq	%rdx, %rsi
00000001000018f9	movq	%rbx, %rdx
00000001000018fc	callq	__T0SS10lowercasedSSyFTfq4x_n
0000000100001901	movq	%rax, %r13
0000000100001904	movq	%rdx, %r14
0000000100001907	movq	%rcx, %r12
000000010000190a	movq	%rbx, %rdi
000000010000190d	callq	_swift_unknownRelease
0000000100001912	movq	-0x48(%rbp), %rdi
0000000100001916	callq	0x1004ebfbe ## symbol stub for: _objc_release
000000010000191b	movq	%r13, -0x40(%rbp)
000000010000191f	movq	%r14, -0x38(%rbp)
0000000100001923	movq	%r12, -0x30(%rbp)
0000000100001927	leaq	-0x40(%rbp), %rdi
000000010000192b	callq	__T0s27_toStringReadOnlyStreamableSSxs010TextOutputE0RzlFTfq4g_nSS_Tg5Tf4n_g
0000000100001930	movq	%rax, %r13
0000000100001933	movq	%rdx, %r14
0000000100001936	movq	%rcx, %rbx
0000000100001939	movq	%rbx, %rdi
000000010000193c	callq	_swift_unknownRetain
0000000100001941	movq	%r12, %rdi
0000000100001944	callq	_swift_unknownRelease
0000000100001949	movq	%r13, 0x38(%r15)
000000010000194d	movq	%r14, 0x40(%r15)
0000000100001951	movq	%rbx, 0x48(%r15)
0000000100001955	leaq	0x4feb6c(%rip), %rax ## literal pool for: ""
000000010000195c	movq	%rax, -0x40(%rbp)
0000000100001960	movq	$0x0, -0x30(%rbp)
0000000100001968	movq	$0x0, -0x38(%rbp)
0000000100001970	leaq	-0x40(%rbp), %rdi
0000000100001974	callq	__T0s27_toStringReadOnlyStreamableSSxs010TextOutputE0RzlFTfq4g_nSS_Tg5Tf4n_g
0000000100001979	movq	%rax, 0x50(%r15)
000000010000197d	movq	%rdx, 0x58(%r15)
0000000100001981	movq	%rcx, 0x60(%r15)
0000000100001985	movq	%rcx, %rdi
0000000100001988	callq	_swift_unknownRetain
000000010000198d	movq	%r15, %rdi
0000000100001990	callq	__T0S2SSaySSG19stringInterpolationd_tcfCTfq4nd_n
0000000100001995	movq	%rax, %rdi
0000000100001998	movq	%rdx, %rsi
000000010000199b	movq	%rcx, %rdx
000000010000199e	leaq	__swiftEmptyArrayStorage(%rip), %rcx
00000001000019a5	callq	__T010Foundation5NSLogySS_Says7CVarArg_pGdtF
00000001000019aa	xorl	%eax, %eax
00000001000019ac	addq	$0x88, %rsp
00000001000019b3	popq	%rbx
00000001000019b4	popq	%r12
00000001000019b6	popq	%r13
00000001000019b8	popq	%r14
00000001000019ba	popq	%r15
00000001000019bc	popq	%rbp
00000001000019bd	retq
00000001000019be	nop

Swift, on the other hand, is not free. An 8-line main.swift file compiles into an executable over 10 MB in size. That's just the price you pay for new shiny.

The moral of the story is, write your code so that it’s easy to debug. Be generous with local variables, and the local variables may return the favor someday. If it ain't broken, think about where you would break it.