optimization: move code for non-zero crc16_align out of the way since it is a rare case

This commit is contained in:
Josh Coalson 2007-03-22 03:37:36 +00:00
parent ddddff6a56
commit 645620c24b

View File

@ -148,13 +148,7 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
;; edi unsigned FLAC__crc16_table[]
;; ebp br
test ecx, ecx ; switch(br->crc16_align) ...
jz .c0b0 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
cmp ecx, 8
je .c0b1
shr edx, 16
cmp ecx, 16
je .c0b2
jmp .c0b3
jnz .c0b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
.c0b0: xor dl, ah ; dl <- (crc>>8)^(word>>24)
movzx ebx, dl
mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
@ -177,7 +171,6 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
shl ax, 8 ; ax <- (crc<<8)
xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
mov [ebp + 24], eax ; br->read_crc <- crc
mov [ebp + 28], dword 0 ; br->crc16_align <- 0
pop ecx
pop edi
@ -185,6 +178,25 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
xor ecx, ecx ; cbits = 0;
; }
jmp near .break1 ; goto break1;
;; this section relocated out of the way for performance
.c0b4:
mov [ebp + 28], dword 0 ; br->crc16_align <- 0
cmp ecx, 8
je .c0b1
shr edx, 16
cmp ecx, 16
je .c0b2
jmp .c0b3
;; this section relocated out of the way for performance
.c1b4:
mov [ebp + 28], dword 0 ; br->crc16_align <- 0
cmp ecx, 8
je .c1b1
shr edx, 16
cmp ecx, 16
je .c1b2
jmp .c1b3
.c1_next2: ; } else {
;; ecx cbits
@ -210,13 +222,7 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
;; edi unsigned FLAC__crc16_table[]
;; ebp br
test ecx, ecx ; switch(br->crc16_align) ...
jz .c1b0 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
cmp ecx, 8
je .c1b1
shr edx, 16
cmp ecx, 16
je .c1b2
jmp .c1b3
jnz .c1b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
.c1b0: xor dl, ah ; dl <- (crc>>8)^(word>>24)
movzx ebx, dl
mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
@ -239,7 +245,6 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
shl ax, 8 ; ax <- (crc<<8)
xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
mov [ebp + 24], eax ; br->read_crc <- crc
mov [ebp + 28], dword 0 ; br->crc16_align <- 0
pop ecx
pop edi
@ -428,13 +433,7 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
;; edi unsigned FLAC__crc16_table[]
;; ebp br
test ecx, ecx ; switch(br->crc16_align) ...
jz .c2b0 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
cmp ecx, 8
je .c2b1
shr edx, 16
cmp ecx, 16
je .c2b2
jmp .c2b3
jnz .c2b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
.c2b0: xor dl, ah ; dl <- (crc>>8)^(word>>24)
movzx ebx, dl
mov cx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
@ -457,7 +456,6 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
shl ax, 8 ; ax <- (crc<<8)
xor ax, cx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
mov [ebp + 24], eax ; br->read_crc <- crc
mov [ebp + 28], dword 0 ; br->crc16_align <- 0
pop eax
pop ecx
pop ebx
@ -473,6 +471,17 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
shld edi, eax, cl
; }
jmp .break2 ; goto break2;
;; this section relocated out of the way for performance
.c2b4:
mov [ebp + 28], dword 0 ; br->crc16_align <- 0
cmp ecx, 8
je .c2b1
shr edx, 16
cmp ecx, 16
je .c2b2
jmp .c2b3
.c2_next3: ; } else {
mov ecx, ebx ; cbits = parameter;
; uval <<= cbits;