This is just a reminder for me, an example to refer to when wondering if I should precalculate the index limit of a for loop or not.
When it's arithmetics on known variables that are not changed in the loop, the compiler optimizes it away just fine.
As it should.
So, relax and in any moment of doubt, consult this example.
I have a simple example program, it's use doesn't matter - what matters, is that it contains a for loop with a computed limit which is not constant at compile time.
shell$ cat size.c
#include <stdio.h>
int main(int argc, char **argv)
{
int step = atoi(argv[1]);
int dims = atoi(argv[2]);
size_t corn = 0;
for(; corn != ((size_t)1<<dims)>>step; ++corn)
printf("%zu\n", corn<<step);
}
shell$ cat size-man.c
#include <stdio.h>
int main(int argc, char **argv)
{
int step = atoi(argv[1]);
int dims = atoi(argv[2]);
size_t corn = 0;
size_t limit = ((size_t)1<<dims)>>step;
for(; corn != limit; ++corn)
printf("%zu\n", corn<<step);
}
Well, let's view the difference this manual optimization makes in the entire resulting assembly code:
shell$ gcc -O -g -o size{,.c}
shell$ gcc -O -g -o size-man{,.c}
shell$ objdump -d -S size-man > size-man.asm
shell$ objdump -d -S size > size.asm
shell$ diff size.asm size-man.asm
2c2
< size: file format elf32-i386
---
> size-man: file format elf32-i386
147c147
< for(; corn != ((size_t)1<<dims)>>step; ++corn)
---
> size_t limit = ((size_t)1<<dims)>>step;
152a153
> for(; corn != limit; ++corn)
164c165
< 80483f2: 39 f3 cmp %esi,%ebx
---
> 80483f2: 39 de cmp %ebx,%esi
So we only change the association of source code lines (no-op) and the order of two registes in the cmp instruction (checking for equality!). Nothing won there, really; just bloated the code with an additional variable declaration.
08048394 <main>:
#include <stdio.h>
int main(int argc, char **argv)
{
8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx
8048398: 83 e4 f0 and $0xfffffff0,%esp
804839b: ff 71 fc pushl 0xfffffffc(%ecx)
804839e: 55 push %ebp
804839f: 89 e5 mov %esp,%ebp
80483a1: 57 push %edi
80483a2: 56 push %esi
80483a3: 53 push %ebx
80483a4: 51 push %ecx
80483a5: 83 ec 08 sub $0x8,%esp
80483a8: 8b 59 04 mov 0x4(%ecx),%ebx
int step = atoi(argv[1]);
80483ab: 8b 43 04 mov 0x4(%ebx),%eax
80483ae: 89 04 24 mov %eax,(%esp)
80483b1: e8 0e ff ff ff call 80482c4 <atoi@plt>
80483b6: 89 c7 mov %eax,%edi
int dims = atoi(argv[2]);
80483b8: 8b 43 08 mov 0x8(%ebx),%eax
80483bb: 89 04 24 mov %eax,(%esp)
80483be: e8 01 ff ff ff call 80482c4 <atoi@plt>
80483c3: 89 c1 mov %eax,%ecx
size_t corn = 0;
for(; corn != ((size_t)1<<dims)>>step; ++corn)
80483c5: b8 01 00 00 00 mov $0x1,%eax
80483ca: d3 e0 shl %cl,%eax
80483cc: 89 c6 mov %eax,%esi
80483ce: 89 f9 mov %edi,%ecx
80483d0: d3 ee shr %cl,%esi
80483d2: 85 f6 test %esi,%esi
80483d4: 74 20 je 80483f6 <main+0x62>
80483d6: bb 00 00 00 00 mov $0x0,%ebx
printf("%zu\n", corn<<step);
80483db: 89 d8 mov %ebx,%eax
80483dd: 89 f9 mov %edi,%ecx
80483df: d3 e0 shl %cl,%eax
80483e1: 89 44 24 04 mov %eax,0x4(%esp)
80483e5: c7 04 24 c8 84 04 08 movl $0x80484c8,(%esp)
80483ec: e8 c3 fe ff ff call 80482b4 <printf@plt>
80483f1: 43 inc %ebx
80483f2: 39 f3 cmp %esi,%ebx
80483f4: 75 e5 jne 80483db <main+0x47>
}
08048394 <main>:
#include <stdio.h>
int main(int argc, char **argv)
{
8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx
8048398: 83 e4 f0 and $0xfffffff0,%esp
804839b: ff 71 fc pushl 0xfffffffc(%ecx)
804839e: 55 push %ebp
804839f: 89 e5 mov %esp,%ebp
80483a1: 57 push %edi
80483a2: 56 push %esi
80483a3: 53 push %ebx
80483a4: 51 push %ecx
80483a5: 83 ec 08 sub $0x8,%esp
80483a8: 8b 59 04 mov 0x4(%ecx),%ebx
int step = atoi(argv[1]);
80483ab: 8b 43 04 mov 0x4(%ebx),%eax
80483ae: 89 04 24 mov %eax,(%esp)
80483b1: e8 0e ff ff ff call 80482c4 <atoi@plt>
80483b6: 89 c7 mov %eax,%edi
int dims = atoi(argv[2]);
80483b8: 8b 43 08 mov 0x8(%ebx),%eax
80483bb: 89 04 24 mov %eax,(%esp)
80483be: e8 01 ff ff ff call 80482c4 <atoi@plt>
80483c3: 89 c1 mov %eax,%ecx
size_t corn = 0;
size_t limit = ((size_t)1<<dims)>>step;
80483c5: b8 01 00 00 00 mov $0x1,%eax
80483ca: d3 e0 shl %cl,%eax
80483cc: 89 c6 mov %eax,%esi
80483ce: 89 f9 mov %edi,%ecx
80483d0: d3 ee shr %cl,%esi
for(; corn != limit; ++corn)
80483d2: 85 f6 test %esi,%esi
80483d4: 74 20 je 80483f6 <main+0x62>
80483d6: bb 00 00 00 00 mov $0x0,%ebx
printf("%zu\n", corn<<step);
80483db: 89 d8 mov %ebx,%eax
80483dd: 89 f9 mov %edi,%ecx
80483df: d3 e0 shl %cl,%eax
80483e1: 89 44 24 04 mov %eax,0x4(%esp)
80483e5: c7 04 24 c8 84 04 08 movl $0x80484c8,(%esp)
80483ec: e8 c3 fe ff ff call 80482b4 <printf@plt>
80483f1: 43 inc %ebx
80483f2: 39 de cmp %ebx,%esi
80483f4: 75 e5 jne 80483db <main+0x47>
}