One small point that often gets overlooked in writing C/C++ programs is the difference between pointers and arrays.
Syntactically pointers and arrays are very similar and, in the (very common) case of parameters to function
they are actually defined to be identical in the C99 standard.
So it is not surprising that we don't often consider the difference between pointers and arrays. However, in the spirit of unnecessary (and probably useless optimization), keeping the difference in mind is a good thing.
For example,:
int main (int argc, char **argv)
and
int main (int argc, char *argv[])
are identical but
pva-arr.c
char foo ()
{
char arr[] = "hello world!"; /* Using ARRAY */
return ptr [7];
}
and
pva-ptr.c
char foo ()
{
char *ptr = "hello world!"; /* Using POINTER */
return ptr [7];
}
are
not!
We can easily see this from the disassembly - both GNU's
gcc and Microsoft's
cl generate different code for them like so:
pva-gcc.out
;(pva-arr gcc 3.4.4 output) |;(pva-ptr gcc 3.4.4 output)
+
.file "dpta.c" | .file "dpta.c"
.section .rdata,"dr" | .section .rdata,"dr"
LC0: |LC0:
.ascii "hello world!\0" | .ascii "hello world!\0"
.text | .text
.globl _foo |.globl _foo
.def _foo; .scl 2; .type 32; .endef| .def _foo; .scl 2; .type 32; .endef
_foo: |_foo:
pushl %ebp | pushl %ebp
movl %esp, %ebp | movl %esp, %ebp
subl $40, %esp | subl $40, %esp
movl $LC0, -12(%ebp) | movl $LC0, -12(%ebp)
movl LC0, %eax | movl LC0, %eax
movl %eax, -40(%ebp) | movl %eax, -40(%ebp)
movl LC0+4, %eax | movl LC0+4, %eax
movl %eax, -36(%ebp) | movl %eax, -36(%ebp)
movl LC0+8, %eax | movl LC0+8, %eax
movl %eax, -32(%ebp) | movl %eax, -32(%ebp)
movzbl LC0+12, %eax | movzbl LC0+12, %eax
movb %al, -28(%ebp) | movb %al, -28(%ebp)
|
|
| movl -12(%ebp), %eax
;EXTRA INSTRUCTIONS =============>> | addl $7, %eax
|
|
movsbl -33(%ebp),%eax | movsbl (%eax),%eax
leave | leave
ret | ret
pva-msvc.out
;(pva-arr.c cl 13.10.3077 output) |;(pva-ptr.c 13.10.3077 output)
+
TITLE dpta.c | TITLE dpta.c
.386P | .386P
include listing.inc |include listing.inc
if @Version gt 510 |if @Version gt 510
.model FLAT |.model FLAT
else |else
_TEXT SEGMENT PARA USE32 PUBLIC 'CODE' |_TEXT SEGMENT PARA USE32 PUBLIC 'CODE'
_TEXT ENDS |_TEXT ENDS
_DATA SEGMENT DWORD USE32 PUBLIC 'DATA' |_DATA SEGMENT DWORD USE32 PUBLIC 'DATA'
_DATA ENDS |_DATA ENDS
CONST SEGMENT DWORD USE32 PUBLIC 'CONST'|CONST SEGMENT DWORD USE32 PUBLIC 'CONST'
CONST ENDS |CONST ENDS
_BSS SEGMENT DWORD USE32 PUBLIC 'BSS' |_BSS SEGMENT DWORD USE32 PUBLIC 'BSS'
_BSS ENDS |_BSS ENDS
$$SYMBOLS SEGMENT BYTE USE32 'DEBSYM' |$$SYMBOLS SEGMENT BYTE USE32 'DEBSYM'
$$SYMBOLS ENDS |$$SYMBOLS ENDS
_TLS SEGMENT DWORD USE32 PUBLIC 'TLS' |_TLS SEGMENT DWORD USE32 PUBLIC 'TLS'
_TLS ENDS |_TLS ENDS
FLAT GROUP _DATA, CONST, _BSS |FLAT GROUP _DATA, CONST, _BSS
ASSUME CS: FLAT, DS: FLAT, SS: FLAT | ASSUME CS: FLAT, DS: FLAT, SS: FLAT
endif |endif
|
INCLUDELIB LIBC |INCLUDELIB LIBC
INCLUDELIB OLDNAMES |INCLUDELIB OLDNAMES
|
_DATA SEGMENT |_DATA SEGMENT
$SG471 DB 'hello world!', 00H |$SG471 DB 'hello world!', 00H
ORG $+3 | ORG $+3
$SG473 DB 'hello world!', 00H |$SG473 DB 'hello world!', 00H
_DATA ENDS |_DATA ENDS
PUBLIC _foo |PUBLIC _foo
_TEXT SEGMENT |_TEXT SEGMENT
_ptr$ = -20 |_ptr$ = -20
_arr$ = -16 |_arr$ = -16
_foo PROC NEAR |_foo PROC NEAR
push ebp | push ebp
mov ebp, esp | mov ebp, esp
sub esp, 20 | sub esp, 20
mov DWORD PTR _ptr$[ebp], OFFSET FLAT>| mov DWORD PTR _ptr$[ebp], OFFSET FLAT:$SG471
mov eax, DWORD PTR $SG473 | mov eax, DWORD PTR $SG473
mov DWORD PTR _arr$[ebp], eax | mov DWORD PTR _arr$[ebp], eax
mov ecx, DWORD PTR $SG473+4 | mov ecx, DWORD PTR $SG473+4
mov DWORD PTR _arr$[ebp+4], ecx | mov DWORD PTR _arr$[ebp+4], ecx
mov edx, DWORD PTR $SG473+8 | mov edx, DWORD PTR $SG473+8
mov DWORD PTR _arr$[ebp+8], edx | mov DWORD PTR _arr$[ebp+8], edx
mov al, BYTE PTR $SG473+12 | mov al, BYTE PTR $SG473+12
mov BYTE PTR _arr$[ebp+12], al | mov BYTE PTR _arr$[ebp+12], al
|
|
; EXTRA INSTRUCTION ============>> | mov ecx, DWORD PTR _ptr$[ebp]
|
|
mov al, BYTE PTR _arr$[ebp+7] | mov al, BYTE PTR [ecx+7]
mov esp, ebp | mov esp, ebp
pop ebp | pop ebp
ret 0 | ret 0
_foo ENDP |_foo ENDP
_TEXT ENDS |_TEXT ENDS
END |END
As expected, the array version is marginally more efficient than the pointer version. Another minor point to keep in the back of your mind.