This is just a reminder for me, an example to refer to when wondering if I should precalculate the index limit of a for loop or not.
When it's arithmetics on known variables that are not changed in the loop, the compiler optimizes it away just fine.
As it should.
So, relax and in any moment of doubt, consult this example.
I have a simple example program, it's use doesn't matter - what matters, is that it contains a for loop with a computed limit which is not constant at compile time.
shell$ cat size.c #include <stdio.h> int main(int argc, char **argv) { int step = atoi(argv[1]); int dims = atoi(argv[2]); size_t corn = 0; for(; corn != ((size_t)1<<dims)>>step; ++corn) printf("%zu\n", corn<<step); }
shell$ cat size-man.c #include <stdio.h> int main(int argc, char **argv) { int step = atoi(argv[1]); int dims = atoi(argv[2]); size_t corn = 0; size_t limit = ((size_t)1<<dims)>>step; for(; corn != limit; ++corn) printf("%zu\n", corn<<step); }
Well, let's view the difference this manual optimization makes in the entire resulting assembly code:
shell$ gcc -O -g -o size{,.c} shell$ gcc -O -g -o size-man{,.c} shell$ objdump -d -S size-man > size-man.asm shell$ objdump -d -S size > size.asm shell$ diff size.asm size-man.asm 2c2 < size: file format elf32-i386 --- > size-man: file format elf32-i386 147c147 < for(; corn != ((size_t)1<<dims)>>step; ++corn) --- > size_t limit = ((size_t)1<<dims)>>step; 152a153 > for(; corn != limit; ++corn) 164c165 < 80483f2: 39 f3 cmp %esi,%ebx --- > 80483f2: 39 de cmp %ebx,%esi
So we only change the association of source code lines (no-op) and the order of two registes in the cmp instruction (checking for equality!). Nothing won there, really; just bloated the code with an additional variable declaration.
08048394 <main>: #include <stdio.h> int main(int argc, char **argv) { 8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx 8048398: 83 e4 f0 and $0xfffffff0,%esp 804839b: ff 71 fc pushl 0xfffffffc(%ecx) 804839e: 55 push %ebp 804839f: 89 e5 mov %esp,%ebp 80483a1: 57 push %edi 80483a2: 56 push %esi 80483a3: 53 push %ebx 80483a4: 51 push %ecx 80483a5: 83 ec 08 sub $0x8,%esp 80483a8: 8b 59 04 mov 0x4(%ecx),%ebx int step = atoi(argv[1]); 80483ab: 8b 43 04 mov 0x4(%ebx),%eax 80483ae: 89 04 24 mov %eax,(%esp) 80483b1: e8 0e ff ff ff call 80482c4 <atoi@plt> 80483b6: 89 c7 mov %eax,%edi int dims = atoi(argv[2]); 80483b8: 8b 43 08 mov 0x8(%ebx),%eax 80483bb: 89 04 24 mov %eax,(%esp) 80483be: e8 01 ff ff ff call 80482c4 <atoi@plt> 80483c3: 89 c1 mov %eax,%ecx size_t corn = 0; for(; corn != ((size_t)1<<dims)>>step; ++corn) 80483c5: b8 01 00 00 00 mov $0x1,%eax 80483ca: d3 e0 shl %cl,%eax 80483cc: 89 c6 mov %eax,%esi 80483ce: 89 f9 mov %edi,%ecx 80483d0: d3 ee shr %cl,%esi 80483d2: 85 f6 test %esi,%esi 80483d4: 74 20 je 80483f6 <main+0x62> 80483d6: bb 00 00 00 00 mov $0x0,%ebx printf("%zu\n", corn<<step); 80483db: 89 d8 mov %ebx,%eax 80483dd: 89 f9 mov %edi,%ecx 80483df: d3 e0 shl %cl,%eax 80483e1: 89 44 24 04 mov %eax,0x4(%esp) 80483e5: c7 04 24 c8 84 04 08 movl $0x80484c8,(%esp) 80483ec: e8 c3 fe ff ff call 80482b4 <printf@plt> 80483f1: 43 inc %ebx 80483f2: 39 f3 cmp %esi,%ebx 80483f4: 75 e5 jne 80483db <main+0x47> }
08048394 <main>: #include <stdio.h> int main(int argc, char **argv) { 8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx 8048398: 83 e4 f0 and $0xfffffff0,%esp 804839b: ff 71 fc pushl 0xfffffffc(%ecx) 804839e: 55 push %ebp 804839f: 89 e5 mov %esp,%ebp 80483a1: 57 push %edi 80483a2: 56 push %esi 80483a3: 53 push %ebx 80483a4: 51 push %ecx 80483a5: 83 ec 08 sub $0x8,%esp 80483a8: 8b 59 04 mov 0x4(%ecx),%ebx int step = atoi(argv[1]); 80483ab: 8b 43 04 mov 0x4(%ebx),%eax 80483ae: 89 04 24 mov %eax,(%esp) 80483b1: e8 0e ff ff ff call 80482c4 <atoi@plt> 80483b6: 89 c7 mov %eax,%edi int dims = atoi(argv[2]); 80483b8: 8b 43 08 mov 0x8(%ebx),%eax 80483bb: 89 04 24 mov %eax,(%esp) 80483be: e8 01 ff ff ff call 80482c4 <atoi@plt> 80483c3: 89 c1 mov %eax,%ecx size_t corn = 0; size_t limit = ((size_t)1<<dims)>>step; 80483c5: b8 01 00 00 00 mov $0x1,%eax 80483ca: d3 e0 shl %cl,%eax 80483cc: 89 c6 mov %eax,%esi 80483ce: 89 f9 mov %edi,%ecx 80483d0: d3 ee shr %cl,%esi for(; corn != limit; ++corn) 80483d2: 85 f6 test %esi,%esi 80483d4: 74 20 je 80483f6 <main+0x62> 80483d6: bb 00 00 00 00 mov $0x0,%ebx printf("%zu\n", corn<<step); 80483db: 89 d8 mov %ebx,%eax 80483dd: 89 f9 mov %edi,%ecx 80483df: d3 e0 shl %cl,%eax 80483e1: 89 44 24 04 mov %eax,0x4(%esp) 80483e5: c7 04 24 c8 84 04 08 movl $0x80484c8,(%esp) 80483ec: e8 c3 fe ff ff call 80482b4 <printf@plt> 80483f1: 43 inc %ebx 80483f2: 39 de cmp %ebx,%esi 80483f4: 75 e5 jne 80483db <main+0x47> }