Update theora to latest v1.1, which improve performance and quality.

Untested yet...


git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@33343 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Philippe Houdoin 2009-09-29 00:37:35 +00:00
parent ebbc6d886d
commit b501083349
84 changed files with 28001 additions and 15437 deletions

View File

@ -0,0 +1,16 @@
SubDir HAIKU_TOP src add-ons media plugins theora ;
SetSubDirSupportedPlatformsBeOSCompatible ;
UsePrivateHeaders media shared ;
SubDirSysHdrs $(SUBDIR) libtheora ;
SubDirHdrs $(SUBDIR) .. ogg ;
SubDirSysHdrs $(SUBDIR) .. ogg libogg ;
Addon theora :
theoraCodecPlugin.cpp
: libtheora.a libogg.a be libmedia.so $(TARGET_LIBSUPC++)
;
SubInclude HAIKU_TOP src add-ons media plugins theora libtheora ;

View File

@ -1,28 +1,38 @@
SubDir HAIKU_TOP src add-ons media plugins theora libtheora ;
SubDirHdrs [ FDirName $(SUBDIR) .. .. ogg libogg ] ;
SubDirSysHdrs $(SUBDIR) ;
SubDirHdrs $(SUBDIR) .. .. ogg ;
SubDirSysHdrs $(SUBDIR) .. .. ogg libogg ;
SubDirCcFlags -DPACKAGE=\\\"libtheora\\\" -DVERSION=\\\"0.0\\\" ;
SubDirCcFlags -DPACKAGE=\\\"libtheora\\\" -DVERSION=\\\"1.1.0\\\" ;
SubDirCcFlags -D_REENTRANT -DPIC -DTRUE=true ;
local arch_sources ;
if $(TARGET_ARCH) = x86 {
arch_sources =
mmxidct.c
mmxfrag.c
mmxstate.c
x86state.c
;
SEARCH_SOURCE += [ FDirName $(SUBDIR) x86 ] ;
}
StaticLibrary libtheora.a :
blockmap.c
comment.c
dct.c
dct_decode.c
dct_encode.c
apiwrapper.c
bitpack.c
decapiwrapper.c
decinfo.c
decode.c
encode.c
frarray.c
frinit.c
huffman.c
dequant.c
fragment.c
huffdec.c
idct.c
mcomp.c
misc_common.c
pb.c
pp.c
info.c
internal.c
quant.c
reconstruct.c
scan.c
toplevel.c
state.c
$(arch_sources)
;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,166 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: apiwrapper.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "apiwrapper.h"
const char *theora_version_string(void){
return th_version_string();
}
ogg_uint32_t theora_version_number(void){
return th_version_number();
}
void theora_info_init(theora_info *_ci){
memset(_ci,0,sizeof(*_ci));
}
void theora_info_clear(theora_info *_ci){
th_api_wrapper *api;
api=(th_api_wrapper *)_ci->codec_setup;
memset(_ci,0,sizeof(*_ci));
if(api!=NULL){
if(api->clear!=NULL)(*api->clear)(api);
_ogg_free(api);
}
}
void theora_clear(theora_state *_th){
/*Provide compatibility with mixed encoder and decoder shared lib versions.*/
if(_th->internal_decode!=NULL){
(*((oc_state_dispatch_vtable *)_th->internal_decode)->clear)(_th);
}
if(_th->internal_encode!=NULL){
(*((oc_state_dispatch_vtable *)_th->internal_encode)->clear)(_th);
}
if(_th->i!=NULL)theora_info_clear(_th->i);
memset(_th,0,sizeof(*_th));
}
int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){
/*Provide compatibility with mixed encoder and decoder shared lib versions.*/
if(_th->internal_decode!=NULL){
return (*((oc_state_dispatch_vtable *)_th->internal_decode)->control)(_th,
_req,_buf,_buf_sz);
}
else if(_th->internal_encode!=NULL){
return (*((oc_state_dispatch_vtable *)_th->internal_encode)->control)(_th,
_req,_buf,_buf_sz);
}
else return TH_EINVAL;
}
ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){
/*Provide compatibility with mixed encoder and decoder shared lib versions.*/
if(_th->internal_decode!=NULL){
return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_frame)(
_th,_gp);
}
else if(_th->internal_encode!=NULL){
return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_frame)(
_th,_gp);
}
else return -1;
}
double theora_granule_time(theora_state *_th, ogg_int64_t _gp){
/*Provide compatibility with mixed encoder and decoder shared lib versions.*/
if(_th->internal_decode!=NULL){
return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_time)(
_th,_gp);
}
else if(_th->internal_encode!=NULL){
return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_time)(
_th,_gp);
}
else return -1;
}
void oc_theora_info2th_info(th_info *_info,const theora_info *_ci){
_info->version_major=_ci->version_major;
_info->version_minor=_ci->version_minor;
_info->version_subminor=_ci->version_subminor;
_info->frame_width=_ci->width;
_info->frame_height=_ci->height;
_info->pic_width=_ci->frame_width;
_info->pic_height=_ci->frame_height;
_info->pic_x=_ci->offset_x;
_info->pic_y=_ci->offset_y;
_info->fps_numerator=_ci->fps_numerator;
_info->fps_denominator=_ci->fps_denominator;
_info->aspect_numerator=_ci->aspect_numerator;
_info->aspect_denominator=_ci->aspect_denominator;
switch(_ci->colorspace){
case OC_CS_ITU_REC_470M:_info->colorspace=TH_CS_ITU_REC_470M;break;
case OC_CS_ITU_REC_470BG:_info->colorspace=TH_CS_ITU_REC_470BG;break;
default:_info->colorspace=TH_CS_UNSPECIFIED;break;
}
switch(_ci->pixelformat){
case OC_PF_420:_info->pixel_fmt=TH_PF_420;break;
case OC_PF_422:_info->pixel_fmt=TH_PF_422;break;
case OC_PF_444:_info->pixel_fmt=TH_PF_444;break;
default:_info->pixel_fmt=TH_PF_RSVD;
}
_info->target_bitrate=_ci->target_bitrate;
_info->quality=_ci->quality;
_info->keyframe_granule_shift=_ci->keyframe_frequency_force>0?
OC_MINI(31,oc_ilog(_ci->keyframe_frequency_force-1)):0;
}
int theora_packet_isheader(ogg_packet *_op){
return th_packet_isheader(_op);
}
int theora_packet_iskeyframe(ogg_packet *_op){
return th_packet_iskeyframe(_op);
}
int theora_granule_shift(theora_info *_ci){
/*This breaks when keyframe_frequency_force is not positive or is larger than
2**31 (if your int is more than 32 bits), but that's what the original
function does.*/
return oc_ilog(_ci->keyframe_frequency_force-1);
}
void theora_comment_init(theora_comment *_tc){
th_comment_init((th_comment *)_tc);
}
char *theora_comment_query(theora_comment *_tc,char *_tag,int _count){
return th_comment_query((th_comment *)_tc,_tag,_count);
}
int theora_comment_query_count(theora_comment *_tc,char *_tag){
return th_comment_query_count((th_comment *)_tc,_tag);
}
void theora_comment_clear(theora_comment *_tc){
th_comment_clear((th_comment *)_tc);
}
void theora_comment_add(theora_comment *_tc,char *_comment){
th_comment_add((th_comment *)_tc,_comment);
}
void theora_comment_add_tag(theora_comment *_tc, char *_tag, char *_value){
th_comment_add_tag((th_comment *)_tc,_tag,_value);
}

View File

@ -0,0 +1,54 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: apiwrapper.h 13596 2007-08-23 20:05:38Z tterribe $
********************************************************************/
#if !defined(_apiwrapper_H)
# define _apiwrapper_H (1)
# include <ogg/ogg.h>
# include <theora/theora.h>
# include "theora/theoradec.h"
# include "theora/theoraenc.h"
# include "internal.h"
typedef struct th_api_wrapper th_api_wrapper;
typedef struct th_api_info th_api_info;
/*Provide an entry point for the codec setup to clear itself in case we ever
want to break pieces off into a common base library shared by encoder and
decoder.
In addition, this makes several other pieces of the API wrapper cleaner.*/
typedef void (*oc_setup_clear_func)(void *_ts);
/*Generally only one of these pointers will be non-NULL in any given instance.
Technically we do not even really need this struct, since we should be able
to figure out which one from "context", but doing it this way makes sure we
don't flub it up.*/
struct th_api_wrapper{
oc_setup_clear_func clear;
th_setup_info *setup;
th_dec_ctx *decode;
th_enc_ctx *encode;
};
struct th_api_info{
th_api_wrapper api;
theora_info info;
};
void oc_theora_info2th_info(th_info *_info,const theora_info *_ci);
#endif

View File

@ -0,0 +1,111 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function: packing variable sized words into an octet stream
last mod: $Id: bitpack.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include <string.h>
#include <stdlib.h>
#include "bitpack.h"
/*We're 'MSb' endian; if we write a word but read individual bits,
then we'll read the MSb first.*/
void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes){
memset(_b,0,sizeof(*_b));
_b->ptr=_buf;
_b->stop=_buf+_bytes;
}
static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
const unsigned char *ptr;
const unsigned char *stop;
oc_pb_window window;
int available;
window=_b->window;
available=_b->bits;
ptr=_b->ptr;
stop=_b->stop;
while(available<=OC_PB_WINDOW_SIZE-8&&ptr<stop){
available+=8;
window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
}
_b->ptr=ptr;
if(_bits>available){
if(ptr>=stop){
_b->eof=1;
available=OC_LOTS_OF_BITS;
}
else window|=*ptr>>(available&7);
}
_b->bits=available;
return window;
}
int oc_pack_look1(oc_pack_buf *_b){
oc_pb_window window;
int available;
window=_b->window;
available=_b->bits;
if(available<1)_b->window=window=oc_pack_refill(_b,1);
return window>>OC_PB_WINDOW_SIZE-1;
}
void oc_pack_adv1(oc_pack_buf *_b){
_b->window<<=1;
_b->bits--;
}
/*Here we assume that 0<=_bits&&_bits<=32.*/
long oc_pack_read(oc_pack_buf *_b,int _bits){
oc_pb_window window;
int available;
long result;
window=_b->window;
available=_b->bits;
if(_bits==0)return 0;
if(available<_bits){
window=oc_pack_refill(_b,_bits);
available=_b->bits;
}
result=window>>OC_PB_WINDOW_SIZE-_bits;
available-=_bits;
window<<=1;
window<<=_bits-1;
_b->bits=available;
_b->window=window;
return result;
}
int oc_pack_read1(oc_pack_buf *_b){
oc_pb_window window;
int available;
int result;
window=_b->window;
available=_b->bits;
if(available<1){
window=oc_pack_refill(_b,1);
available=_b->bits;
}
result=window>>OC_PB_WINDOW_SIZE-1;
available--;
window<<=1;
_b->bits=available;
_b->window=window;
return result;
}
long oc_pack_bytes_left(oc_pack_buf *_b){
if(_b->eof)return -1;
return _b->stop-_b->ptr+(_b->bits>>3);
}

View File

@ -0,0 +1,59 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function: packing variable sized words into an octet stream
last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $
********************************************************************/
#if !defined(_bitpack_H)
# define _bitpack_H (1)
# include <limits.h>
typedef unsigned long oc_pb_window;
typedef struct oc_pack_buf oc_pack_buf;
# define OC_PB_WINDOW_SIZE ((int)sizeof(oc_pb_window)*CHAR_BIT)
/*This is meant to be a large, positive constant that can still be efficiently
loaded as an immediate (on platforms like ARM, for example).
Even relatively modest values like 100 would work fine.*/
# define OC_LOTS_OF_BITS (0x40000000)
struct oc_pack_buf{
oc_pb_window window;
const unsigned char *ptr;
const unsigned char *stop;
int bits;
int eof;
};
void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes);
int oc_pack_look1(oc_pack_buf *_b);
void oc_pack_adv1(oc_pack_buf *_b);
/*Here we assume 0<=_bits&&_bits<=32.*/
long oc_pack_read(oc_pack_buf *_b,int _bits);
int oc_pack_read1(oc_pack_buf *_b);
/* returns -1 for read beyond EOF, or the number of whole bytes available */
long oc_pack_bytes_left(oc_pack_buf *_b);
/*These two functions are implemented locally in huffdec.c*/
/*Read in bits without advancing the bitptr.
Here we assume 0<=_bits&&_bits<=32.*/
/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/
/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/
#endif

View File

@ -1,38 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: block_inline.h,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include "encoder_internal.h"
static ogg_int32_t MBOrderMap[4] = { 0, 2, 3, 1 };
static ogg_int32_t BlockOrderMap1[4][4] = {
{ 0, 1, 3, 2 },
{ 0, 2, 3, 1 },
{ 0, 2, 3, 1 },
{ 3, 2, 0, 1 }
};
static ogg_int32_t QuadMapToIndex1( ogg_int32_t (*BlockMap)[4][4],
ogg_uint32_t SB, ogg_uint32_t MB,
ogg_uint32_t B ){
return BlockMap[SB][MBOrderMap[MB]][BlockOrderMap1[MB][B]];
}
static ogg_int32_t QuadMapToMBTopLeft( ogg_int32_t (*BlockMap)[4][4],
ogg_uint32_t SB, ogg_uint32_t MB ){
return BlockMap[SB][MBOrderMap[MB]][0];
}

View File

@ -1,100 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: blockmap.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include "encoder_internal.h"
static void CreateMapping ( ogg_int32_t (*BlockMap)[4][4],
ogg_uint32_t FirstSB,
ogg_uint32_t FirstFrag, ogg_uint32_t HFrags,
ogg_uint32_t VFrags ){
ogg_uint32_t i, j;
ogg_uint32_t xpos;
ogg_uint32_t ypos;
ogg_uint32_t SBrow, SBcol;
ogg_uint32_t SBRows, SBCols;
ogg_uint32_t MB, B;
ogg_uint32_t SB=FirstSB;
ogg_uint32_t FragIndex=FirstFrag;
/* Set Super-Block dimensions */
SBRows = VFrags/4 + ( VFrags%4 ? 1 : 0 );
SBCols = HFrags/4 + ( HFrags%4 ? 1 : 0 );
/* Map each Super-Block */
for ( SBrow=0; SBrow<SBRows; SBrow++ ){
for ( SBcol=0; SBcol<SBCols; SBcol++ ){
/* Y co-ordinate of Super-Block in Block units */
ypos = SBrow<<2;
/* Map Blocks within this Super-Block */
for ( i=0; (i<4) && (ypos<VFrags); i++, ypos++ ){
/* X co-ordinate of Super-Block in Block units */
xpos = SBcol<<2;
for ( j=0; (j<4) && (xpos<HFrags); j++, xpos++ ){
if ( i<2 ){
MB = ( j<2 ? 0 : 1 );
}else{
MB = ( j<2 ? 2 : 3 );
}
if ( i%2 ){
B = ( j%2 ? 3 : 2 );
}else{
B = ( j%2 ? 1 : 0 );
}
/* Set mapping and move to next fragment */
BlockMap[SB][MB][B] = FragIndex++;
}
/* Move to first fragment in next row in Super-Block */
FragIndex += HFrags-j;
}
/* Move on to next Super-Block */
SB++;
FragIndex -= i*HFrags-j;
}
/* Move to first Super-Block in next row */
FragIndex += 3*HFrags;
}
}
void CreateBlockMapping ( ogg_int32_t (*BlockMap)[4][4],
ogg_uint32_t YSuperBlocks,
ogg_uint32_t UVSuperBlocks,
ogg_uint32_t HFrags, ogg_uint32_t VFrags ) {
ogg_uint32_t i, j;
for ( i=0; i<YSuperBlocks + UVSuperBlocks * 2; i++ ){
for ( j=0; j<4; j++ ) {
BlockMap[i][j][0] = -1;
BlockMap[i][j][1] = -1;
BlockMap[i][j][2] = -1;
BlockMap[i][j][3] = -1;
}
}
CreateMapping ( BlockMap, 0, 0, HFrags, VFrags );
CreateMapping ( BlockMap, YSuperBlocks, HFrags*VFrags, HFrags/2, VFrags/2 );
CreateMapping ( BlockMap, YSuperBlocks + UVSuperBlocks, (HFrags*VFrags*5)/4,
HFrags/2, VFrags/2 );
}

View File

@ -1,110 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function: read/write and client interface for comment header packet
last mod: $Id: comment.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "encoder_internal.h"
void theora_comment_init(theora_comment *tc){
memset(tc,0,sizeof(*tc));
}
void theora_comment_add(theora_comment *tc,char *comment){
tc->user_comments=_ogg_realloc(tc->user_comments,
(tc->comments+2)*sizeof(*tc->user_comments));
tc->comment_lengths=_ogg_realloc(tc->comment_lengths,
(tc->comments+2)*sizeof(*tc->comment_lengths));
tc->comment_lengths[tc->comments]=strlen(comment);
tc->user_comments[tc->comments]=_ogg_malloc(tc->comment_lengths[tc->comments]+1);
strcpy(tc->user_comments[tc->comments], comment);
tc->comments++;
tc->user_comments[tc->comments]=NULL;
}
void theora_comment_add_tag(theora_comment *tc, char *tag, char *value){
char *comment=_ogg_malloc(strlen(tag)+strlen(value)+2); /* +2 for = and \0 */
strcpy(comment, tag);
strcat(comment, "=");
strcat(comment, value);
theora_comment_add(tc, comment);
_ogg_free(comment);
}
/* This is more or less the same as strncasecmp - but that doesn't exist
* everywhere, and this is a fairly trivial function, so we include it */
static int tagcompare(const char *s1, const char *s2, int n){
int c=0;
while(c < n){
if(toupper(s1[c]) != toupper(s2[c]))
return !0;
c++;
}
return 0;
}
char *theora_comment_query(theora_comment *tc, char *tag, int count){
long i;
int found = 0;
int taglen = strlen(tag)+1; /* +1 for the = we append */
char *fulltag = _ogg_malloc(taglen+ 1);
strcpy(fulltag, tag);
strcat(fulltag, "=");
for(i=0;i<tc->comments;i++){
if(!tagcompare(tc->user_comments[i], fulltag, taglen)){
if(count == found){
_ogg_free(fulltag);
/* We return a pointer to the data, not a copy */
return tc->user_comments[i] + taglen;
}
else
found++;
}
}
_ogg_free(fulltag);
return NULL; /* didn't find anything */
}
int theora_comment_query_count(theora_comment *tc, char *tag){
int i,count=0;
int taglen = strlen(tag)+1; /* +1 for the = we append */
char *fulltag = _ogg_malloc(taglen+1);
strcpy(fulltag,tag);
strcat(fulltag, "=");
for(i=0;i<tc->comments;i++){
if(!tagcompare(tc->user_comments[i], fulltag, taglen))
count++;
}
_ogg_free(fulltag);
return count;
}
void theora_comment_clear(theora_comment *tc){
if(tc){
long i;
for(i=0;i<tc->comments;i++)
if(tc->user_comments[i])_ogg_free(tc->user_comments[i]);
if(tc->user_comments)_ogg_free(tc->user_comments);
if(tc->comment_lengths)_ogg_free(tc->comment_lengths);
if(tc->vendor)_ogg_free(tc->vendor);
}
memset(tc,0,sizeof(*tc));
}

View File

@ -0,0 +1,226 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
CPU capability detection for x86 processors.
Originally written by Rudolf Marek.
function:
last mod: $Id: cpu.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include "cpu.h"
#if !defined(OC_X86_ASM)
static ogg_uint32_t oc_cpu_flags_get(void){
return 0;
}
#else
# if !defined(_MSC_VER)
# if defined(__amd64__)||defined(__x86_64__)
/*On x86-64, gcc seems to be able to figure out how to save %rbx for us when
compiling with -fPIC.*/
# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
__asm__ __volatile__( \
"cpuid\n\t" \
:[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
:"a"(_op) \
:"cc" \
)
# else
/*On x86-32, not so much.*/
# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
__asm__ __volatile__( \
"xchgl %%ebx,%[ebx]\n\t" \
"cpuid\n\t" \
"xchgl %%ebx,%[ebx]\n\t" \
:[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
:"a"(_op) \
:"cc" \
)
# endif
# else
/*Why does MSVC need this complicated rigamarole?
At this point I honestly do not care.*/
/*Visual C cpuid helper function.
For VS2005 we could as well use the _cpuid builtin, but that wouldn't work
for VS2003 users, so we do it in inline assembler.*/
static void oc_cpuid_helper(ogg_uint32_t _cpu_info[4],ogg_uint32_t _op){
_asm{
mov eax,[_op]
mov esi,_cpu_info
cpuid
mov [esi+0],eax
mov [esi+4],ebx
mov [esi+8],ecx
mov [esi+12],edx
}
}
# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
do{ \
ogg_uint32_t cpu_info[4]; \
oc_cpuid_helper(cpu_info,_op); \
(_eax)=cpu_info[0]; \
(_ebx)=cpu_info[1]; \
(_ecx)=cpu_info[2]; \
(_edx)=cpu_info[3]; \
}while(0)
static void oc_detect_cpuid_helper(ogg_uint32_t *_eax,ogg_uint32_t *_ebx){
_asm{
pushfd
pushfd
pop eax
mov ebx,eax
xor eax,200000h
push eax
popfd
pushfd
pop eax
popfd
mov ecx,_eax
mov [ecx],eax
mov ecx,_ebx
mov [ecx],ebx
}
}
# endif
static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
ogg_uint32_t flags;
/*If there isn't even MMX, give up.*/
if(!(_edx&0x00800000))return 0;
flags=OC_CPU_X86_MMX;
if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2;
if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI;
if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3;
if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1;
if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2;
return flags;
}
static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
ogg_uint32_t flags;
/*If there isn't even MMX, give up.*/
if(!(_edx&0x00800000))return 0;
flags=OC_CPU_X86_MMX;
if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A;
if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5;
return flags;
}
static ogg_uint32_t oc_cpu_flags_get(void){
ogg_uint32_t flags;
ogg_uint32_t eax;
ogg_uint32_t ebx;
ogg_uint32_t ecx;
ogg_uint32_t edx;
# if !defined(__amd64__)&&!defined(__x86_64__)
/*Not all x86-32 chips support cpuid, so we have to check.*/
# if !defined(_MSC_VER)
__asm__ __volatile__(
"pushfl\n\t"
"pushfl\n\t"
"popl %[a]\n\t"
"movl %[a],%[b]\n\t"
"xorl $0x200000,%[a]\n\t"
"pushl %[a]\n\t"
"popfl\n\t"
"pushfl\n\t"
"popl %[a]\n\t"
"popfl\n\t"
:[a]"=r"(eax),[b]"=r"(ebx)
:
:"cc"
);
# else
oc_detect_cpuid_helper(&eax,&ebx);
# endif
/*No cpuid.*/
if(eax==ebx)return 0;
# endif
cpuid(0,eax,ebx,ecx,edx);
/* l e t n I e n i u n e G*/
if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547||
/* 6 8 x M T e n i u n e G*/
ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){
/*Intel, Transmeta (tested with Crusoe TM5800):*/
cpuid(1,eax,ebx,ecx,edx);
flags=oc_parse_intel_flags(edx,ecx);
}
/* D M A c i t n e h t u A*/
else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541||
/* C S N y b e d o e G*/
ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){
/*AMD, Geode:*/
cpuid(0x80000000,eax,ebx,ecx,edx);
if(eax<0x80000001)flags=0;
else{
cpuid(0x80000001,eax,ebx,ecx,edx);
flags=oc_parse_amd_flags(edx,ecx);
}
/*Also check for SSE.*/
cpuid(1,eax,ebx,ecx,edx);
flags|=oc_parse_intel_flags(edx,ecx);
}
/*Technically some VIA chips can be configured in the BIOS to return any
string here the user wants.
There is a special detection method that can be used to identify such
processors, but in my opinion, if the user really wants to change it, they
deserve what they get.*/
/* s l u a H r u a t n e C*/
else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){
/*VIA:*/
/*I only have documentation for the C7 (Esther) and Isaiah (forthcoming)
chips (thanks to the engineers from Centaur Technology who provided it).
These chips support Intel-like cpuid info.
The C3-2 (Nehemiah) cores appear to, as well.*/
cpuid(1,eax,ebx,ecx,edx);
flags=oc_parse_intel_flags(edx,ecx);
if(eax>=0x80000001){
/*The (non-Nehemiah) C3 processors support AMD-like cpuid info.
We need to check this even if the Intel test succeeds to pick up 3DNow!
support on these processors.
Unlike actual AMD processors, we cannot _rely_ on this info, since
some cores (e.g., the 693 stepping of the Nehemiah) claim to support
this function, yet return edx=0, despite the Intel test indicating
MMX support.
Therefore the features detected here are strictly added to those
detected by the Intel test.*/
/*TODO: How about earlier chips?*/
cpuid(0x80000001,eax,ebx,ecx,edx);
/*Note: As of the C7, this function returns Intel-style extended feature
flags, not AMD-style.
Currently, this only defines bits 11, 20, and 29 (0x20100800), which
do not conflict with any of the AMD flags we inspect.
For the remaining bits, Intel tells us, "Do not count on their value",
but VIA assures us that they will all be zero (at least on the C7 and
Isaiah chips).
In the (unlikely) event a future processor uses bits 18, 19, 30, or 31
(0xC0C00000) for something else, we will have to add code to detect
the model to decide when it is appropriate to inspect them.*/
flags|=oc_parse_amd_flags(edx,ecx);
}
}
else{
/*Implement me.*/
flags=0;
}
return flags;
}
#endif

View File

@ -0,0 +1,34 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: cpu.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#if !defined(_x86_cpu_H)
# define _x86_cpu_H (1)
#include "internal.h"
#define OC_CPU_X86_MMX (1<<0)
#define OC_CPU_X86_3DNOW (1<<1)
#define OC_CPU_X86_3DNOWEXT (1<<2)
#define OC_CPU_X86_MMXEXT (1<<3)
#define OC_CPU_X86_SSE (1<<4)
#define OC_CPU_X86_SSE2 (1<<5)
#define OC_CPU_X86_PNI (1<<6)
#define OC_CPU_X86_SSSE3 (1<<7)
#define OC_CPU_X86_SSE4_1 (1<<8)
#define OC_CPU_X86_SSE4_2 (1<<9)
#define OC_CPU_X86_SSE4A (1<<10)
#define OC_CPU_X86_SSE5 (1<<11)
#endif

View File

@ -1,253 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: dct.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include "encoder_internal.h"
static ogg_int32_t xC1S7 = 64277;
static ogg_int32_t xC2S6 = 60547;
static ogg_int32_t xC3S5 = 54491;
static ogg_int32_t xC4S4 = 46341;
static ogg_int32_t xC5S3 = 36410;
static ogg_int32_t xC6S2 = 25080;
static ogg_int32_t xC7S1 = 12785;
#define SIGNBITDUPPED(X) ((signed )(((X) & 0x80000000)) >> 31)
#define DOROUND(X) ( (SIGNBITDUPPED(X) & (0xffff)) + (X) )
void fdct_short ( ogg_int16_t * InputData, ogg_int16_t * OutputData ){
int loop;
ogg_int32_t is07, is12, is34, is56;
ogg_int32_t is0734, is1256;
ogg_int32_t id07, id12, id34, id56;
ogg_int32_t irot_input_x, irot_input_y;
ogg_int32_t icommon_product1; /* Re-used product (c4s4 * (s12 - s56)). */
ogg_int32_t icommon_product2; /* Re-used product (c4s4 * (d12 + d56)). */
ogg_int32_t temp1, temp2; /* intermediate variable for computation */
ogg_int32_t InterData[64];
ogg_int32_t *ip = InterData;
ogg_int16_t * op = OutputData;
for (loop = 0; loop < 8; loop++){
/* Pre calculate some common sums and differences. */
is07 = InputData[0] + InputData[7];
is12 = InputData[1] + InputData[2];
is34 = InputData[3] + InputData[4];
is56 = InputData[5] + InputData[6];
id07 = InputData[0] - InputData[7];
id12 = InputData[1] - InputData[2];
id34 = InputData[3] - InputData[4];
id56 = InputData[5] - InputData[6];
is0734 = is07 + is34;
is1256 = is12 + is56;
/* Pre-Calculate some common product terms. */
icommon_product1 = xC4S4*(is12 - is56);
icommon_product1 = DOROUND(icommon_product1);
icommon_product1>>=16;
icommon_product2 = xC4S4*(id12 + id56);
icommon_product2 = DOROUND(icommon_product2);
icommon_product2>>=16;
ip[0] = (xC4S4*(is0734 + is1256));
ip[0] = DOROUND(ip[0]);
ip[0] >>= 16;
ip[4] = (xC4S4*(is0734 - is1256));
ip[4] = DOROUND(ip[4]);
ip[4] >>= 16;
/* Define inputs to rotation for outputs 2 and 6 */
irot_input_x = id12 - id56;
irot_input_y = is07 - is34;
/* Apply rotation for outputs 2 and 6. */
temp1=xC6S2*irot_input_x;
temp1=DOROUND(temp1);
temp1>>=16;
temp2=xC2S6*irot_input_y;
temp2=DOROUND(temp2);
temp2>>=16;
ip[2] = temp1 + temp2;
temp1=xC6S2*irot_input_y;
temp1=DOROUND(temp1);
temp1>>=16;
temp2=xC2S6*irot_input_x ;
temp2=DOROUND(temp2);
temp2>>=16;
ip[6] = temp1 -temp2 ;
/* Define inputs to rotation for outputs 1 and 7 */
irot_input_x = icommon_product1 + id07;
irot_input_y = -( id34 + icommon_product2 );
/* Apply rotation for outputs 1 and 7. */
temp1=xC1S7*irot_input_x;
temp1=DOROUND(temp1);
temp1>>=16;
temp2=xC7S1*irot_input_y;
temp2=DOROUND(temp2);
temp2>>=16;
ip[1] = temp1 - temp2;
temp1=xC7S1*irot_input_x;
temp1=DOROUND(temp1);
temp1>>=16;
temp2=xC1S7*irot_input_y ;
temp2=DOROUND(temp2);
temp2>>=16;
ip[7] = temp1 + temp2 ;
/* Define inputs to rotation for outputs 3 and 5 */
irot_input_x = id07 - icommon_product1;
irot_input_y = id34 - icommon_product2;
/* Apply rotation for outputs 3 and 5. */
temp1=xC3S5*irot_input_x;
temp1=DOROUND(temp1);
temp1>>=16;
temp2=xC5S3*irot_input_y ;
temp2=DOROUND(temp2);
temp2>>=16;
ip[3] = temp1 - temp2 ;
temp1=xC5S3*irot_input_x;
temp1=DOROUND(temp1);
temp1>>=16;
temp2=xC3S5*irot_input_y;
temp2=DOROUND(temp2);
temp2>>=16;
ip[5] = temp1 + temp2;
/* Increment data pointer for next row. */
InputData += 8 ;
ip += 8; /* advance pointer to next row */
}
/* Performed DCT on rows, now transform the columns */
ip = InterData;
for (loop = 0; loop < 8; loop++){
/* Pre calculate some common sums and differences. */
is07 = ip[0 * 8] + ip[7 * 8];
is12 = ip[1 * 8] + ip[2 * 8];
is34 = ip[3 * 8] + ip[4 * 8];
is56 = ip[5 * 8] + ip[6 * 8];
id07 = ip[0 * 8] - ip[7 * 8];
id12 = ip[1 * 8] - ip[2 * 8];
id34 = ip[3 * 8] - ip[4 * 8];
id56 = ip[5 * 8] - ip[6 * 8];
is0734 = is07 + is34;
is1256 = is12 + is56;
/* Pre-Calculate some common product terms. */
icommon_product1 = xC4S4*(is12 - is56) ;
icommon_product2 = xC4S4*(id12 + id56) ;
icommon_product1 = DOROUND(icommon_product1);
icommon_product2 = DOROUND(icommon_product2);
icommon_product1>>=16;
icommon_product2>>=16;
temp1 = xC4S4*(is0734 + is1256) ;
temp2 = xC4S4*(is0734 - is1256) ;
temp1 = DOROUND(temp1);
temp2 = DOROUND(temp2);
temp1>>=16;
temp2>>=16;
op[0*8] = (ogg_int16_t) temp1;
op[4*8] = (ogg_int16_t) temp2;
/* Define inputs to rotation for outputs 2 and 6 */
irot_input_x = id12 - id56;
irot_input_y = is07 - is34;
/* Apply rotation for outputs 2 and 6. */
temp1=xC6S2*irot_input_x;
temp1=DOROUND(temp1);
temp1>>=16;
temp2=xC2S6*irot_input_y;
temp2=DOROUND(temp2);
temp2>>=16;
op[2*8] = (ogg_int16_t) (temp1 + temp2);
temp1=xC6S2*irot_input_y;
temp1=DOROUND(temp1);
temp1>>=16;
temp2=xC2S6*irot_input_x ;
temp2=DOROUND(temp2);
temp2>>=16;
op[6*8] = (ogg_int16_t) (temp1 -temp2) ;
/* Define inputs to rotation for outputs 1 and 7 */
irot_input_x = icommon_product1 + id07;
irot_input_y = -( id34 + icommon_product2 );
/* Apply rotation for outputs 1 and 7. */
temp1=xC1S7*irot_input_x;
temp1=DOROUND(temp1);
temp1>>=16;
temp2=xC7S1*irot_input_y;
temp2=DOROUND(temp2);
temp2>>=16;
op[1*8] = (ogg_int16_t) (temp1 - temp2);
temp1=xC7S1*irot_input_x;
temp1=DOROUND(temp1);
temp1>>=16;
temp2=xC1S7*irot_input_y ;
temp2=DOROUND(temp2);
temp2>>=16;
op[7*8] = (ogg_int16_t) (temp1 + temp2);
/* Define inputs to rotation for outputs 3 and 5 */
irot_input_x = id07 - icommon_product1;
irot_input_y = id34 - icommon_product2;
/* Apply rotation for outputs 3 and 5. */
temp1=xC3S5*irot_input_x;
temp1=DOROUND(temp1);
temp1>>=16;
temp2=xC5S3*irot_input_y ;
temp2=DOROUND(temp2);
temp2>>=16;
op[3*8] = (ogg_int16_t) (temp1 - temp2) ;
temp1=xC5S3*irot_input_x;
temp1=DOROUND(temp1);
temp1>>=16;
temp2=xC3S5*irot_input_y;
temp2=DOROUND(temp2);
temp2>>=16;
op[5*8] = (ogg_int16_t) (temp1 + temp2);
/* Increment data pointer for next column. */
ip ++;
op ++;
}
}

View File

@ -0,0 +1,31 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: dct.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
/*Definitions shared by the forward and inverse DCT transforms.*/
#if !defined(_dct_H)
# define _dct_H (1)
/*cos(n*pi/16) (resp. sin(m*pi/16)) scaled by 65536.*/
#define OC_C1S7 ((ogg_int32_t)64277)
#define OC_C2S6 ((ogg_int32_t)60547)
#define OC_C3S5 ((ogg_int32_t)54491)
#define OC_C4S4 ((ogg_int32_t)46341)
#define OC_C5S3 ((ogg_int32_t)36410)
#define OC_C6S2 ((ogg_int32_t)25080)
#define OC_C7S1 ((ogg_int32_t)12785)
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,557 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: dct_encode.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include <stdlib.h>
#include "encoder_internal.h"
static int ModeUsesMC[MAX_MODES] = { 0, 0, 1, 1, 1, 0, 1, 1 };
static void Sub8 (unsigned char *FiltPtr, unsigned char *ReconPtr,
ogg_int16_t *DctInputPtr, unsigned char *old_ptr1,
unsigned char *new_ptr1, ogg_uint32_t PixelsPerLine,
ogg_uint32_t ReconPixelsPerLine ) {
int i;
/* For each block row */
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ ){
DctInputPtr[0] = (ogg_int16_t)((int)(FiltPtr[0]) - ((int)ReconPtr[0]) );
DctInputPtr[1] = (ogg_int16_t)((int)(FiltPtr[1]) - ((int)ReconPtr[1]) );
DctInputPtr[2] = (ogg_int16_t)((int)(FiltPtr[2]) - ((int)ReconPtr[2]) );
DctInputPtr[3] = (ogg_int16_t)((int)(FiltPtr[3]) - ((int)ReconPtr[3]) );
DctInputPtr[4] = (ogg_int16_t)((int)(FiltPtr[4]) - ((int)ReconPtr[4]) );
DctInputPtr[5] = (ogg_int16_t)((int)(FiltPtr[5]) - ((int)ReconPtr[5]) );
DctInputPtr[6] = (ogg_int16_t)((int)(FiltPtr[6]) - ((int)ReconPtr[6]) );
DctInputPtr[7] = (ogg_int16_t)((int)(FiltPtr[7]) - ((int)ReconPtr[7]) );
/* Update the screen canvas in one step*/
((ogg_uint32_t*)old_ptr1)[0] = ((ogg_uint32_t*)new_ptr1)[0];
((ogg_uint32_t*)old_ptr1)[1] = ((ogg_uint32_t*)new_ptr1)[1];
/* Start next row */
new_ptr1 += PixelsPerLine;
old_ptr1 += PixelsPerLine;
FiltPtr += PixelsPerLine;
ReconPtr += ReconPixelsPerLine;
DctInputPtr += BLOCK_HEIGHT_WIDTH;
}
}
static void Sub8_128 (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
unsigned char *old_ptr1, unsigned char *new_ptr1,
ogg_uint32_t PixelsPerLine ) {
int i;
/* For each block row */
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ ){
/* INTRA mode so code raw image data */
/* We convert the data to 8 bit signed (by subtracting 128) as
this reduces the internal precision requirments in the DCT
transform. */
DctInputPtr[0] = (ogg_int16_t)((int)(FiltPtr[0]) - 128);
DctInputPtr[1] = (ogg_int16_t)((int)(FiltPtr[1]) - 128);
DctInputPtr[2] = (ogg_int16_t)((int)(FiltPtr[2]) - 128);
DctInputPtr[3] = (ogg_int16_t)((int)(FiltPtr[3]) - 128);
DctInputPtr[4] = (ogg_int16_t)((int)(FiltPtr[4]) - 128);
DctInputPtr[5] = (ogg_int16_t)((int)(FiltPtr[5]) - 128);
DctInputPtr[6] = (ogg_int16_t)((int)(FiltPtr[6]) - 128);
DctInputPtr[7] = (ogg_int16_t)((int)(FiltPtr[7]) - 128);
/* Update the screen canvas in one step */
((ogg_uint32_t*)old_ptr1)[0] = ((ogg_uint32_t*)new_ptr1)[0];
((ogg_uint32_t*)old_ptr1)[1] = ((ogg_uint32_t*)new_ptr1)[1];
/* Start next row */
new_ptr1 += PixelsPerLine;
old_ptr1 += PixelsPerLine;
FiltPtr += PixelsPerLine;
DctInputPtr += BLOCK_HEIGHT_WIDTH;
}
}
static void Sub8Av2 (unsigned char *FiltPtr, unsigned char *ReconPtr1,
unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
unsigned char *old_ptr1, unsigned char *new_ptr1,
ogg_uint32_t PixelsPerLine,
ogg_uint32_t ReconPixelsPerLine ) {
int i;
/* For each block row */
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ ) {
DctInputPtr[0] = (ogg_int16_t)
((int)(FiltPtr[0]) - (((int)ReconPtr1[0] + (int)ReconPtr2[0]) / 2) );
DctInputPtr[1] = (ogg_int16_t)
((int)(FiltPtr[1]) - (((int)ReconPtr1[1] + (int)ReconPtr2[1]) / 2) );
DctInputPtr[2] = (ogg_int16_t)
((int)(FiltPtr[2]) - (((int)ReconPtr1[2] + (int)ReconPtr2[2]) / 2) );
DctInputPtr[3] = (ogg_int16_t)
((int)(FiltPtr[3]) - (((int)ReconPtr1[3] + (int)ReconPtr2[3]) / 2) );
DctInputPtr[4] = (ogg_int16_t)
((int)(FiltPtr[4]) - (((int)ReconPtr1[4] + (int)ReconPtr2[4]) / 2) );
DctInputPtr[5] = (ogg_int16_t)
((int)(FiltPtr[5]) - (((int)ReconPtr1[5] + (int)ReconPtr2[5]) / 2) );
DctInputPtr[6] = (ogg_int16_t)
((int)(FiltPtr[6]) - (((int)ReconPtr1[6] + (int)ReconPtr2[6]) / 2) );
DctInputPtr[7] = (ogg_int16_t)
((int)(FiltPtr[7]) - (((int)ReconPtr1[7] + (int)ReconPtr2[7]) / 2) );
/* Update the screen canvas in one step */
((ogg_uint32_t*)old_ptr1)[0] = ((ogg_uint32_t*)new_ptr1)[0];
((ogg_uint32_t*)old_ptr1)[1] = ((ogg_uint32_t*)new_ptr1)[1];
/* Start next row */
new_ptr1 += PixelsPerLine;
old_ptr1 += PixelsPerLine;
FiltPtr += PixelsPerLine;
ReconPtr1 += ReconPixelsPerLine;
ReconPtr2 += ReconPixelsPerLine;
DctInputPtr += BLOCK_HEIGHT_WIDTH;
}
}
static unsigned char TokenizeDctValue (ogg_int16_t DataValue,
ogg_uint32_t * TokenListPtr ){
unsigned char tokens_added = 0;
ogg_uint32_t AbsDataVal = abs( (ogg_int32_t)DataValue );
/* Values are tokenised as category value and a number of additional
bits that define the position within the category. */
if ( DataValue == 0 ) return 0;
if ( AbsDataVal == 1 ){
if ( DataValue == 1 )
TokenListPtr[0] = ONE_TOKEN;
else
TokenListPtr[0] = MINUS_ONE_TOKEN;
tokens_added = 1;
} else if ( AbsDataVal == 2 ) {
if ( DataValue == 2 )
TokenListPtr[0] = TWO_TOKEN;
else
TokenListPtr[0] = MINUS_TWO_TOKEN;
tokens_added = 1;
} else if ( AbsDataVal <= MAX_SINGLE_TOKEN_VALUE ) {
TokenListPtr[0] = LOW_VAL_TOKENS + (AbsDataVal - DCT_VAL_CAT2_MIN);
if ( DataValue > 0 )
TokenListPtr[1] = 0;
else
TokenListPtr[1] = 1;
tokens_added = 2;
} else if ( AbsDataVal <= 8 ) {
/* Bit 1 determines sign, Bit 0 the value */
TokenListPtr[0] = DCT_VAL_CATEGORY3;
if ( DataValue > 0 )
TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT3_MIN);
else
TokenListPtr[1] = (0x02) + (AbsDataVal - DCT_VAL_CAT3_MIN);
tokens_added = 2;
} else if ( AbsDataVal <= 12 ) {
/* Bit 2 determines sign, Bit 0-2 the value */
TokenListPtr[0] = DCT_VAL_CATEGORY4;
if ( DataValue > 0 )
TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT4_MIN);
else
TokenListPtr[1] = (0x04) + (AbsDataVal - DCT_VAL_CAT4_MIN);
tokens_added = 2;
} else if ( AbsDataVal <= 20 ) {
/* Bit 3 determines sign, Bit 0-2 the value */
TokenListPtr[0] = DCT_VAL_CATEGORY5;
if ( DataValue > 0 )
TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT5_MIN);
else
TokenListPtr[1] = (0x08) + (AbsDataVal - DCT_VAL_CAT5_MIN);
tokens_added = 2;
} else if ( AbsDataVal <= 36 ) {
/* Bit 4 determines sign, Bit 0-3 the value */
TokenListPtr[0] = DCT_VAL_CATEGORY6;
if ( DataValue > 0 )
TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT6_MIN);
else
TokenListPtr[1] = (0x010) + (AbsDataVal - DCT_VAL_CAT6_MIN);
tokens_added = 2;
} else if ( AbsDataVal <= 68 ) {
/* Bit 5 determines sign, Bit 0-4 the value */
TokenListPtr[0] = DCT_VAL_CATEGORY7;
if ( DataValue > 0 )
TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT7_MIN);
else
TokenListPtr[1] = (0x20) + (AbsDataVal - DCT_VAL_CAT7_MIN);
tokens_added = 2;
} else if ( AbsDataVal <= 511 ) {
/* Bit 9 determines sign, Bit 0-8 the value */
TokenListPtr[0] = DCT_VAL_CATEGORY8;
if ( DataValue > 0 )
TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT8_MIN);
else
TokenListPtr[1] = (0x200) + (AbsDataVal - DCT_VAL_CAT8_MIN);
tokens_added = 2;
} else {
TokenListPtr[0] = DCT_VAL_CATEGORY8;
if ( DataValue > 0 )
TokenListPtr[1] = (511 - DCT_VAL_CAT8_MIN);
else
TokenListPtr[1] = (0x200) + (511 - DCT_VAL_CAT8_MIN);
tokens_added = 2;
}
/* Return the total number of tokens added */
return tokens_added;
}
static unsigned char TokenizeDctRunValue (unsigned char RunLength,
ogg_int16_t DataValue,
ogg_uint32_t * TokenListPtr ){
unsigned char tokens_added = 0;
ogg_uint32_t AbsDataVal = abs( (ogg_int32_t)DataValue );
/* Values are tokenised as category value and a number of additional
bits that define the category. */
if ( DataValue == 0 ) return 0;
if ( AbsDataVal == 1 ) {
/* Zero runs of 1-5 */
if ( RunLength <= 5 ) {
TokenListPtr[0] = DCT_RUN_CATEGORY1 + (RunLength - 1);
if ( DataValue > 0 )
TokenListPtr[1] = 0;
else
TokenListPtr[1] = 1;
} else if ( RunLength <= 9 ) {
/* Zero runs of 6-9 */
TokenListPtr[0] = DCT_RUN_CATEGORY1B;
if ( DataValue > 0 )
TokenListPtr[1] = (RunLength - 6);
else
TokenListPtr[1] = 0x04 + (RunLength - 6);
} else {
/* Zero runs of 10-17 */
TokenListPtr[0] = DCT_RUN_CATEGORY1C;
if ( DataValue > 0 )
TokenListPtr[1] = (RunLength - 10);
else
TokenListPtr[1] = 0x08 + (RunLength - 10);
}
tokens_added = 2;
} else if ( AbsDataVal <= 3 ) {
if ( RunLength == 1 ) {
TokenListPtr[0] = DCT_RUN_CATEGORY2;
/* Extra bits token bit 1 indicates sign, bit 0 indicates value */
if ( DataValue > 0 )
TokenListPtr[1] = (AbsDataVal - 2);
else
TokenListPtr[1] = (0x02) + (AbsDataVal - 2);
tokens_added = 2;
}else{
TokenListPtr[0] = DCT_RUN_CATEGORY2 + 1;
/* Extra bits token. */
/* bit 2 indicates sign, bit 1 indicates value, bit 0 indicates
run length */
if ( DataValue > 0 )
TokenListPtr[1] = ((AbsDataVal - 2) << 1) + (RunLength - 2);
else
TokenListPtr[1] = (0x04) + ((AbsDataVal - 2) << 1) + (RunLength - 2);
tokens_added = 2;
}
} else {
tokens_added = 2; /* ERROR */
/*IssueWarning( "Bad Input to TokenizeDctRunValue" );*/
}
/* Return the total number of tokens added */
return tokens_added;
}
static unsigned char TokenizeDctBlock (ogg_int16_t * RawData,
ogg_uint32_t * TokenListPtr ) {
ogg_uint32_t i;
unsigned char run_count;
unsigned char token_count = 0; /* Number of tokens crated. */
ogg_uint32_t AbsData;
/* Tokenize the block */
for( i = 0; i < BLOCK_SIZE; i++ ){
run_count = 0;
/* Look for a zero run. */
/* NOTE the use of & instead of && which is faster (and
equivalent) in this instance. */
/* NO, NO IT ISN'T --Monty */
while( (i < BLOCK_SIZE) && (!RawData[i]) ){
run_count++;
i++;
}
/* If we have reached the end of the block then code EOB */
if ( i == BLOCK_SIZE ){
TokenListPtr[token_count] = DCT_EOB_TOKEN;
token_count++;
}else{
/* If we have a short zero run followed by a low data value code
the two as a composite token. */
if ( run_count ){
AbsData = abs(RawData[i]);
if ( ((AbsData == 1) && (run_count <= 17)) ||
((AbsData <= 3) && (run_count <= 3)) ) {
/* Tokenise the run and subsequent value combination value */
token_count += TokenizeDctRunValue( run_count,
RawData[i],
&TokenListPtr[token_count] );
}else{
/* Else if we have a long non-EOB run or a run followed by a
value token > MAX_RUN_VAL then code the run and token
seperately */
if ( run_count <= 8 )
TokenListPtr[token_count] = DCT_SHORT_ZRL_TOKEN;
else
TokenListPtr[token_count] = DCT_ZRL_TOKEN;
token_count++;
TokenListPtr[token_count] = run_count - 1;
token_count++;
/* Now tokenize the value */
token_count += TokenizeDctValue( RawData[i],
&TokenListPtr[token_count] );
}
}else{
/* Else there was NO zero run. */
/* Tokenise the value */
token_count += TokenizeDctValue( RawData[i],
&TokenListPtr[token_count] );
}
}
}
/* Return the total number of tokens (including additional bits
tokens) used. */
return token_count;
}
ogg_uint32_t DPCMTokenizeBlock (CP_INSTANCE *cpi,
ogg_int32_t FragIndex){
ogg_uint32_t token_count;
if ( GetFrameType(&cpi->pb) == BASE_FRAME ){
/* Key frame so code block in INTRA mode. */
cpi->pb.CodingMode = CODE_INTRA;
}else{
/* Get Motion vector and mode for this block. */
cpi->pb.CodingMode = cpi->pb.FragCodingMethod[FragIndex];
}
/* Tokenise the dct data. */
token_count = TokenizeDctBlock( cpi->pb.QFragData[FragIndex],
cpi->pb.TokenList[FragIndex] );
cpi->FragTokenCounts[FragIndex] = token_count;
cpi->TotTokenCount += token_count;
/* Return number of pixels coded (i.e. 8x8). */
return BLOCK_SIZE;
}
static int AllZeroDctData( Q_LIST_ENTRY * QuantList ){
ogg_uint32_t i;
for ( i = 0; i < 64; i ++ )
if ( QuantList[i] != 0 )
return 0;
return 1;
}
static void MotionBlockDifference (CP_INSTANCE * cpi, unsigned char * FiltPtr,
ogg_int16_t *DctInputPtr, ogg_int32_t MvDevisor,
unsigned char* old_ptr1, unsigned char* new_ptr1,
ogg_uint32_t FragIndex,ogg_uint32_t PixelsPerLine,
ogg_uint32_t ReconPixelsPerLine) {
ogg_int32_t MvShift;
ogg_int32_t MvModMask;
ogg_int32_t AbsRefOffset;
ogg_int32_t AbsXOffset;
ogg_int32_t AbsYOffset;
ogg_int32_t MVOffset; /* Baseline motion vector offset */
ogg_int32_t ReconPtr2Offset; /* Offset for second reconstruction in
half pixel MC */
unsigned char *ReconPtr1; /* DCT reconstructed image pointers */
unsigned char *ReconPtr2; /* Pointer used in half pixel MC */
switch(MvDevisor) {
case 2:
MvShift = 1;
MvModMask = 1;
break;
case 4:
MvShift = 2;
MvModMask = 3;
break;
default:
break;
}
cpi->MVector.x = cpi->pb.FragMVect[FragIndex].x;
cpi->MVector.y = cpi->pb.FragMVect[FragIndex].y;
/* Set up the baseline offset for the motion vector. */
MVOffset = ((cpi->MVector.y / MvDevisor) * ReconPixelsPerLine) +
(cpi->MVector.x / MvDevisor);
/* Work out the offset of the second reference position for 1/2
pixel interpolation. For the U and V planes the MV specifies 1/4
pixel accuracy. This is adjusted to 1/2 pixel as follows ( 0->0,
1/4->1/2, 1/2->1/2, 3/4->1/2 ). */
ReconPtr2Offset = 0;
AbsXOffset = cpi->MVector.x % MvDevisor;
AbsYOffset = cpi->MVector.y % MvDevisor;
if ( AbsXOffset ) {
if ( cpi->MVector.x > 0 )
ReconPtr2Offset += 1;
else
ReconPtr2Offset -= 1;
}
if ( AbsYOffset ) {
if ( cpi->MVector.y > 0 )
ReconPtr2Offset += ReconPixelsPerLine;
else
ReconPtr2Offset -= ReconPixelsPerLine;
}
if ( cpi->pb.CodingMode==CODE_GOLDEN_MV ) {
ReconPtr1 = &cpi->
pb.GoldenFrame[cpi->pb.recon_pixel_index_table[FragIndex]];
} else {
ReconPtr1 = &cpi->
pb.LastFrameRecon[cpi->pb.recon_pixel_index_table[FragIndex]];
}
ReconPtr1 += MVOffset;
ReconPtr2 = ReconPtr1 + ReconPtr2Offset;
AbsRefOffset = abs((int)(ReconPtr1 - ReconPtr2));
/* Is the MV offset exactly pixel alligned */
if ( AbsRefOffset == 0 ){
Sub8( FiltPtr, ReconPtr1, DctInputPtr, old_ptr1, new_ptr1,
PixelsPerLine, ReconPixelsPerLine );
} else {
/* Fractional pixel MVs. */
/* Note that we only use two pixel values even for the diagonal */
Sub8Av2(FiltPtr, ReconPtr1,ReconPtr2,DctInputPtr, old_ptr1,
new_ptr1, PixelsPerLine, ReconPixelsPerLine );
}
}
void TransformQuantizeBlock (CP_INSTANCE *cpi, ogg_int32_t FragIndex,
ogg_uint32_t PixelsPerLine ) {
unsigned char *new_ptr1; /* Pointers into current frame */
unsigned char *old_ptr1; /* Pointers into old frame */
unsigned char *FiltPtr; /* Pointers to srf filtered pixels */
ogg_int16_t *DctInputPtr; /* Pointer into buffer containing input to DCT */
int LeftEdge; /* Flag if block at left edge of component */
ogg_uint32_t ReconPixelsPerLine; /* Line length for recon buffers. */
unsigned char *ReconPtr1; /* DCT reconstructed image pointers */
ogg_int32_t MvDevisor; /* Defines MV resolution (2 = 1/2
pixel for Y or 4 = 1/4 for UV) */
new_ptr1 = &cpi->yuv1ptr[cpi->pb.pixel_index_table[FragIndex]];
old_ptr1 = &cpi->yuv0ptr[cpi->pb.pixel_index_table[FragIndex]];
DctInputPtr = cpi->DCTDataBuffer;
/* Set plane specific values */
if (FragIndex < (ogg_int32_t)cpi->pb.YPlaneFragments){
ReconPixelsPerLine = cpi->pb.YStride;
MvDevisor = 2; /* 1/2 pixel accuracy in Y */
}else{
ReconPixelsPerLine = cpi->pb.UVStride;
MvDevisor = 4; /* UV planes at 1/2 resolution of Y */
}
/* adjusted / filtered pointers */
FiltPtr = &cpi->ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]];
if ( GetFrameType(&cpi->pb) == BASE_FRAME ) {
/* Key frame so code block in INTRA mode. */
cpi->pb.CodingMode = CODE_INTRA;
}else{
/* Get Motion vector and mode for this block. */
cpi->pb.CodingMode = cpi->pb.FragCodingMethod[FragIndex];
}
/* Selection of Quantiser matirx and set other plane related values. */
if ( FragIndex < (ogg_int32_t)cpi->pb.YPlaneFragments ){
LeftEdge = !(FragIndex%cpi->pb.HFragments);
/* Select the approrpriate Y quantiser matrix */
if ( cpi->pb.CodingMode == CODE_INTRA )
select_Y_quantiser(&cpi->pb);
else
select_Inter_quantiser(&cpi->pb);
}else{
LeftEdge = !((FragIndex-cpi->pb.YPlaneFragments)%(cpi->pb.HFragments>>1));
/* Select the approrpriate UV quantiser matrix */
if ( cpi->pb.CodingMode == CODE_INTRA )
select_UV_quantiser(&cpi->pb);
else
select_Inter_quantiser(&cpi->pb);
}
if ( ModeUsesMC[cpi->pb.CodingMode] ){
MotionBlockDifference(cpi, FiltPtr, DctInputPtr, MvDevisor,
old_ptr1, new_ptr1, FragIndex, PixelsPerLine,
ReconPixelsPerLine);
} else if ( (cpi->pb.CodingMode==CODE_INTER_NO_MV ) ||
( cpi->pb.CodingMode==CODE_USING_GOLDEN ) ) {
if ( cpi->pb.CodingMode==CODE_INTER_NO_MV ) {
ReconPtr1 = &cpi->
pb.LastFrameRecon[cpi->pb.recon_pixel_index_table[FragIndex]];
} else {
ReconPtr1 = &cpi->
pb.GoldenFrame[cpi->pb.recon_pixel_index_table[FragIndex]];
}
Sub8( FiltPtr, ReconPtr1, DctInputPtr, old_ptr1, new_ptr1,
PixelsPerLine, ReconPixelsPerLine );
} else if ( cpi->pb.CodingMode==CODE_INTRA ) {
Sub8_128(FiltPtr, DctInputPtr, old_ptr1, new_ptr1, PixelsPerLine);
}
/* Proceed to encode the data into the encode buffer if the encoder
is enabled. */
/* Perform a 2D DCT transform on the data. */
fdct_short( cpi->DCTDataBuffer, cpi->DCT_codes );
/* Quantize that transform data. */
quantize ( &cpi->pb, cpi->DCT_codes, cpi->pb.QFragData[FragIndex] );
if ( (cpi->pb.CodingMode == CODE_INTER_NO_MV) &&
( AllZeroDctData(cpi->pb.QFragData[FragIndex]) ) ) {
cpi->pb.display_fragments[FragIndex] = 0;
}
}

View File

@ -0,0 +1,193 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: decapiwrapper.c 13596 2007-08-23 20:05:38Z tterribe $
********************************************************************/
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "apiwrapper.h"
#include "decint.h"
#include "theora/theoradec.h"
static void th_dec_api_clear(th_api_wrapper *_api){
if(_api->setup)th_setup_free(_api->setup);
if(_api->decode)th_decode_free(_api->decode);
memset(_api,0,sizeof(*_api));
}
static void theora_decode_clear(theora_state *_td){
if(_td->i!=NULL)theora_info_clear(_td->i);
memset(_td,0,sizeof(*_td));
}
static int theora_decode_control(theora_state *_td,int _req,
void *_buf,size_t _buf_sz){
return th_decode_ctl(((th_api_wrapper *)_td->i->codec_setup)->decode,
_req,_buf,_buf_sz);
}
static ogg_int64_t theora_decode_granule_frame(theora_state *_td,
ogg_int64_t _gp){
return th_granule_frame(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
}
static double theora_decode_granule_time(theora_state *_td,ogg_int64_t _gp){
return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
}
static const oc_state_dispatch_vtable OC_DEC_DISPATCH_VTBL={
(oc_state_clear_func)theora_decode_clear,
(oc_state_control_func)theora_decode_control,
(oc_state_granule_frame_func)theora_decode_granule_frame,
(oc_state_granule_time_func)theora_decode_granule_time,
};
static void th_info2theora_info(theora_info *_ci,const th_info *_info){
_ci->version_major=_info->version_major;
_ci->version_minor=_info->version_minor;
_ci->version_subminor=_info->version_subminor;
_ci->width=_info->frame_width;
_ci->height=_info->frame_height;
_ci->frame_width=_info->pic_width;
_ci->frame_height=_info->pic_height;
_ci->offset_x=_info->pic_x;
_ci->offset_y=_info->pic_y;
_ci->fps_numerator=_info->fps_numerator;
_ci->fps_denominator=_info->fps_denominator;
_ci->aspect_numerator=_info->aspect_numerator;
_ci->aspect_denominator=_info->aspect_denominator;
switch(_info->colorspace){
case TH_CS_ITU_REC_470M:_ci->colorspace=OC_CS_ITU_REC_470M;break;
case TH_CS_ITU_REC_470BG:_ci->colorspace=OC_CS_ITU_REC_470BG;break;
default:_ci->colorspace=OC_CS_UNSPECIFIED;break;
}
switch(_info->pixel_fmt){
case TH_PF_420:_ci->pixelformat=OC_PF_420;break;
case TH_PF_422:_ci->pixelformat=OC_PF_422;break;
case TH_PF_444:_ci->pixelformat=OC_PF_444;break;
default:_ci->pixelformat=OC_PF_RSVD;
}
_ci->target_bitrate=_info->target_bitrate;
_ci->quality=_info->quality;
_ci->keyframe_frequency_force=1<<_info->keyframe_granule_shift;
}
int theora_decode_init(theora_state *_td,theora_info *_ci){
th_api_info *apiinfo;
th_api_wrapper *api;
th_info info;
api=(th_api_wrapper *)_ci->codec_setup;
/*Allocate our own combined API wrapper/theora_info struct.
We put them both in one malloc'd block so that when the API wrapper is
freed, the info struct goes with it.
This avoids having to figure out whether or not we need to free the info
struct in either theora_info_clear() or theora_clear().*/
apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo));
if(apiinfo==NULL)return OC_FAULT;
/*Make our own copy of the info struct, since its lifetime should be
independent of the one we were passed in.*/
*&apiinfo->info=*_ci;
/*Convert the info struct now instead of saving the the one we decoded with
theora_decode_header(), since the user might have modified values (i.e.,
color space, aspect ratio, etc. can be specified from a higher level).
The user also might be doing something "clever" with the header packets if
they are not using an Ogg encapsulation.*/
oc_theora_info2th_info(&info,_ci);
/*Don't bother to copy the setup info; th_decode_alloc() makes its own copy
of the stuff it needs.*/
apiinfo->api.decode=th_decode_alloc(&info,api->setup);
if(apiinfo->api.decode==NULL){
_ogg_free(apiinfo);
return OC_EINVAL;
}
apiinfo->api.clear=(oc_setup_clear_func)th_dec_api_clear;
_td->internal_encode=NULL;
/*Provide entry points for ABI compatibility with old decoder shared libs.*/
_td->internal_decode=(void *)&OC_DEC_DISPATCH_VTBL;
_td->granulepos=0;
_td->i=&apiinfo->info;
_td->i->codec_setup=&apiinfo->api;
return 0;
}
int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){
th_api_wrapper *api;
th_info info;
int ret;
api=(th_api_wrapper *)_ci->codec_setup;
/*Allocate an API wrapper struct on demand, since it will not also include a
theora_info struct like the ones that are used in a theora_state struct.*/
if(api==NULL){
_ci->codec_setup=_ogg_calloc(1,sizeof(*api));
if(_ci->codec_setup==NULL)return OC_FAULT;
api=(th_api_wrapper *)_ci->codec_setup;
api->clear=(oc_setup_clear_func)th_dec_api_clear;
}
/*Convert from the theora_info struct instead of saving our own th_info
struct between calls.
The user might be doing something "clever" with the header packets if they
are not using an Ogg encapsulation, and we don't want to break this.*/
oc_theora_info2th_info(&info,_ci);
/*We rely on the fact that theora_comment and th_comment structures are
actually identical.
Take care not to change this fact unless you change the code here as
well!*/
ret=th_decode_headerin(&info,(th_comment *)_cc,&api->setup,_op);
/*We also rely on the fact that the error return code values are the same,
and that the implementations of these two functions return the same set of
them.
Note that theora_decode_header() really can return OC_NOTFORMAT, even
though it is not currently documented to do so.*/
if(ret<0)return ret;
th_info2theora_info(_ci,&info);
return 0;
}
int theora_decode_packetin(theora_state *_td,ogg_packet *_op){
th_api_wrapper *api;
ogg_int64_t gp;
int ret;
if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
api=(th_api_wrapper *)_td->i->codec_setup;
ret=th_decode_packetin(api->decode,_op,&gp);
if(ret<0)return OC_BADPACKET;
_td->granulepos=gp;
return 0;
}
int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
th_api_wrapper *api;
th_dec_ctx *decode;
th_ycbcr_buffer buf;
int ret;
if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
api=(th_api_wrapper *)_td->i->codec_setup;
decode=(th_dec_ctx *)api->decode;
if(!decode)return OC_FAULT;
ret=th_decode_ycbcr_out(decode,buf);
if(ret>=0){
_yuv->y_width=buf[0].width;
_yuv->y_height=buf[0].height;
_yuv->y_stride=buf[0].stride;
_yuv->uv_width=buf[1].width;
_yuv->uv_height=buf[1].height;
_yuv->uv_stride=buf[1].stride;
_yuv->y=buf[0].data;
_yuv->u=buf[1].data;
_yuv->v=buf[2].data;
}
return ret;
}

View File

@ -0,0 +1,246 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: decinfo.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "decint.h"
/*Unpacks a series of octets from a given byte array into the pack buffer.
No checking is done to ensure the buffer contains enough data.
_opb: The pack buffer to read the octets from.
_buf: The byte array to store the unpacked bytes in.
_len: The number of octets to unpack.*/
static void oc_unpack_octets(oc_pack_buf *_opb,char *_buf,size_t _len){
while(_len-->0){
long val;
val=oc_pack_read(_opb,8);
*_buf++=(char)val;
}
}
/*Unpacks a 32-bit integer encoded by octets in little-endian form.*/
static long oc_unpack_length(oc_pack_buf *_opb){
long ret[4];
int i;
for(i=0;i<4;i++)ret[i]=oc_pack_read(_opb,8);
return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24;
}
static int oc_info_unpack(oc_pack_buf *_opb,th_info *_info){
long val;
/*Check the codec bitstream version.*/
val=oc_pack_read(_opb,8);
_info->version_major=(unsigned char)val;
val=oc_pack_read(_opb,8);
_info->version_minor=(unsigned char)val;
val=oc_pack_read(_opb,8);
_info->version_subminor=(unsigned char)val;
/*verify we can parse this bitstream version.
We accept earlier minors and all subminors, by spec*/
if(_info->version_major>TH_VERSION_MAJOR||
_info->version_major==TH_VERSION_MAJOR&&
_info->version_minor>TH_VERSION_MINOR){
return TH_EVERSION;
}
/*Read the encoded frame description.*/
val=oc_pack_read(_opb,16);
_info->frame_width=(ogg_uint32_t)val<<4;
val=oc_pack_read(_opb,16);
_info->frame_height=(ogg_uint32_t)val<<4;
val=oc_pack_read(_opb,24);
_info->pic_width=(ogg_uint32_t)val;
val=oc_pack_read(_opb,24);
_info->pic_height=(ogg_uint32_t)val;
val=oc_pack_read(_opb,8);
_info->pic_x=(ogg_uint32_t)val;
val=oc_pack_read(_opb,8);
_info->pic_y=(ogg_uint32_t)val;
val=oc_pack_read(_opb,32);
_info->fps_numerator=(ogg_uint32_t)val;
val=oc_pack_read(_opb,32);
_info->fps_denominator=(ogg_uint32_t)val;
if(_info->frame_width==0||_info->frame_height==0||
_info->pic_width+_info->pic_x>_info->frame_width||
_info->pic_height+_info->pic_y>_info->frame_height||
_info->fps_numerator==0||_info->fps_denominator==0){
return TH_EBADHEADER;
}
/*Note: The sense of pic_y is inverted in what we pass back to the
application compared to how it is stored in the bitstream.
This is because the bitstream uses a right-handed coordinate system, while
applications expect a left-handed one.*/
_info->pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
val=oc_pack_read(_opb,24);
_info->aspect_numerator=(ogg_uint32_t)val;
val=oc_pack_read(_opb,24);
_info->aspect_denominator=(ogg_uint32_t)val;
val=oc_pack_read(_opb,8);
_info->colorspace=(th_colorspace)val;
val=oc_pack_read(_opb,24);
_info->target_bitrate=(int)val;
val=oc_pack_read(_opb,6);
_info->quality=(int)val;
val=oc_pack_read(_opb,5);
_info->keyframe_granule_shift=(int)val;
val=oc_pack_read(_opb,2);
_info->pixel_fmt=(th_pixel_fmt)val;
if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER;
val=oc_pack_read(_opb,3);
if(val!=0||oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
return 0;
}
static int oc_comment_unpack(oc_pack_buf *_opb,th_comment *_tc){
long len;
int i;
/*Read the vendor string.*/
len=oc_unpack_length(_opb);
if(len<0||len>oc_pack_bytes_left(_opb))return TH_EBADHEADER;
_tc->vendor=_ogg_malloc((size_t)len+1);
if(_tc->vendor==NULL)return TH_EFAULT;
oc_unpack_octets(_opb,_tc->vendor,len);
_tc->vendor[len]='\0';
/*Read the user comments.*/
_tc->comments=(int)oc_unpack_length(_opb);
len=_tc->comments;
if(len<0||len>(LONG_MAX>>2)||len<<2>oc_pack_bytes_left(_opb)){
_tc->comments=0;
return TH_EBADHEADER;
}
_tc->comment_lengths=(int *)_ogg_malloc(
_tc->comments*sizeof(_tc->comment_lengths[0]));
_tc->user_comments=(char **)_ogg_malloc(
_tc->comments*sizeof(_tc->user_comments[0]));
for(i=0;i<_tc->comments;i++){
len=oc_unpack_length(_opb);
if(len<0||len>oc_pack_bytes_left(_opb)){
_tc->comments=i;
return TH_EBADHEADER;
}
_tc->comment_lengths[i]=len;
_tc->user_comments[i]=_ogg_malloc((size_t)len+1);
if(_tc->user_comments[i]==NULL){
_tc->comments=i;
return TH_EFAULT;
}
oc_unpack_octets(_opb,_tc->user_comments[i],len);
_tc->user_comments[i][len]='\0';
}
return oc_pack_bytes_left(_opb)<0?TH_EBADHEADER:0;
}
static int oc_setup_unpack(oc_pack_buf *_opb,th_setup_info *_setup){
int ret;
/*Read the quantizer tables.*/
ret=oc_quant_params_unpack(_opb,&_setup->qinfo);
if(ret<0)return ret;
/*Read the Huffman trees.*/
return oc_huff_trees_unpack(_opb,_setup->huff_tables);
}
static void oc_setup_clear(th_setup_info *_setup){
oc_quant_params_clear(&_setup->qinfo);
oc_huff_trees_clear(_setup->huff_tables);
}
static int oc_dec_headerin(oc_pack_buf *_opb,th_info *_info,
th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){
char buffer[6];
long val;
int packtype;
int ret;
val=oc_pack_read(_opb,8);
packtype=(int)val;
/*If we're at a data packet and we have received all three headers, we're
done.*/
if(!(packtype&0x80)&&_info->frame_width>0&&_tc->vendor!=NULL&&*_setup!=NULL){
return 0;
}
/*Check the codec string.*/
oc_unpack_octets(_opb,buffer,6);
if(memcmp(buffer,"theora",6)!=0)return TH_ENOTFORMAT;
switch(packtype){
/*Codec info header.*/
case 0x80:{
/*This should be the first packet, and we should not already be
initialized.*/
if(!_op->b_o_s||_info->frame_width>0)return TH_EBADHEADER;
ret=oc_info_unpack(_opb,_info);
if(ret<0)th_info_clear(_info);
else ret=3;
}break;
/*Comment header.*/
case 0x81:{
if(_tc==NULL)return TH_EFAULT;
/*We shoud have already decoded the info header, and should not yet have
decoded the comment header.*/
if(_info->frame_width==0||_tc->vendor!=NULL)return TH_EBADHEADER;
ret=oc_comment_unpack(_opb,_tc);
if(ret<0)th_comment_clear(_tc);
else ret=2;
}break;
/*Codec setup header.*/
case 0x82:{
oc_setup_info *setup;
if(_tc==NULL||_setup==NULL)return TH_EFAULT;
/*We should have already decoded the info header and the comment header,
and should not yet have decoded the setup header.*/
if(_info->frame_width==0||_tc->vendor==NULL||*_setup!=NULL){
return TH_EBADHEADER;
}
setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup));
if(setup==NULL)return TH_EFAULT;
ret=oc_setup_unpack(_opb,setup);
if(ret<0){
oc_setup_clear(setup);
_ogg_free(setup);
}
else{
*_setup=setup;
ret=1;
}
}break;
default:{
/*We don't know what this header is.*/
return TH_EBADHEADER;
}break;
}
return ret;
}
/*Decodes one header packet.
This should be called repeatedly with the packets at the beginning of the
stream until it returns 0.*/
int th_decode_headerin(th_info *_info,th_comment *_tc,
th_setup_info **_setup,ogg_packet *_op){
oc_pack_buf opb;
if(_op==NULL)return TH_EBADHEADER;
if(_info==NULL)return TH_EFAULT;
oc_pack_readinit(&opb,_op->packet,_op->bytes);
return oc_dec_headerin(&opb,_info,_tc,_setup,_op);
}
void th_setup_free(th_setup_info *_setup){
if(_setup!=NULL){
oc_setup_clear(_setup);
_ogg_free(_setup);
}
}

View File

@ -0,0 +1,107 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: decint.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include <limits.h>
#if !defined(_decint_H)
# define _decint_H (1)
# include "theora/theoradec.h"
# include "internal.h"
# include "bitpack.h"
typedef struct th_setup_info oc_setup_info;
typedef struct th_dec_ctx oc_dec_ctx;
# include "huffdec.h"
# include "dequant.h"
/*Constants for the packet-in state machine specific to the decoder.*/
/*Next packet to read: Data packet.*/
#define OC_PACKET_DATA (0)
struct th_setup_info{
/*The Huffman codes.*/
oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES];
/*The quantization parameters.*/
th_quant_info qinfo;
};
struct th_dec_ctx{
/*Shared encoder/decoder state.*/
oc_theora_state state;
/*Whether or not packets are ready to be emitted.
This takes on negative values while there are remaining header packets to
be emitted, reaches 0 when the codec is ready for input, and goes to 1
when a frame has been processed and a data packet is ready.*/
int packet_state;
/*Buffer in which to assemble packets.*/
oc_pack_buf opb;
/*Huffman decode trees.*/
oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES];
/*The index of the first token in each plane for each coefficient.*/
ptrdiff_t ti0[3][64];
/*The number of outstanding EOB runs at the start of each coefficient in each
plane.*/
ptrdiff_t eob_runs[3][64];
/*The DCT token lists.*/
unsigned char *dct_tokens;
/*The extra bits associated with DCT tokens.*/
unsigned char *extra_bits;
/*The number of dct tokens unpacked so far.*/
int dct_tokens_count;
/*The out-of-loop post-processing level.*/
int pp_level;
/*The DC scale used for out-of-loop deblocking.*/
int pp_dc_scale[64];
/*The sharpen modifier used for out-of-loop deringing.*/
int pp_sharp_mod[64];
/*The DC quantization index of each block.*/
unsigned char *dc_qis;
/*The variance of each block.*/
int *variances;
/*The storage for the post-processed frame buffer.*/
unsigned char *pp_frame_data;
/*Whether or not the post-processsed frame buffer has space for chroma.*/
int pp_frame_state;
/*The buffer used for the post-processed frame.
Note that this is _not_ guaranteed to have the same strides and offsets as
the reference frame buffers.*/
th_ycbcr_buffer pp_frame_buf;
/*The striped decode callback function.*/
th_stripe_callback stripe_cb;
# if defined(HAVE_CAIRO)
/*Output metrics for debugging.*/
int telemetry;
int telemetry_mbmode;
int telemetry_mv;
int telemetry_qi;
int telemetry_bits;
int telemetry_frame_bytes;
int telemetry_coding_bytes;
int telemetry_mode_bytes;
int telemetry_mv_bytes;
int telemetry_qi_bytes;
int telemetry_dc_bytes;
unsigned char *telemetry_frame_data;
# endif
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,182 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: dequant.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include <stdlib.h>
#include <string.h>
#include <ogg/ogg.h>
#include "dequant.h"
#include "decint.h"
int oc_quant_params_unpack(oc_pack_buf *_opb,th_quant_info *_qinfo){
th_quant_base *base_mats;
long val;
int nbase_mats;
int sizes[64];
int indices[64];
int nbits;
int bmi;
int ci;
int qti;
int pli;
int qri;
int qi;
int i;
val=oc_pack_read(_opb,3);
nbits=(int)val;
for(qi=0;qi<64;qi++){
val=oc_pack_read(_opb,nbits);
_qinfo->loop_filter_limits[qi]=(unsigned char)val;
}
val=oc_pack_read(_opb,4);
nbits=(int)val+1;
for(qi=0;qi<64;qi++){
val=oc_pack_read(_opb,nbits);
_qinfo->ac_scale[qi]=(ogg_uint16_t)val;
}
val=oc_pack_read(_opb,4);
nbits=(int)val+1;
for(qi=0;qi<64;qi++){
val=oc_pack_read(_opb,nbits);
_qinfo->dc_scale[qi]=(ogg_uint16_t)val;
}
val=oc_pack_read(_opb,9);
nbase_mats=(int)val+1;
base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0]));
if(base_mats==NULL)return TH_EFAULT;
for(bmi=0;bmi<nbase_mats;bmi++){
for(ci=0;ci<64;ci++){
val=oc_pack_read(_opb,8);
base_mats[bmi][ci]=(unsigned char)val;
}
}
nbits=oc_ilog(nbase_mats-1);
for(i=0;i<6;i++){
th_quant_ranges *qranges;
th_quant_base *qrbms;
int *qrsizes;
qti=i/3;
pli=i%3;
qranges=_qinfo->qi_ranges[qti]+pli;
if(i>0){
val=oc_pack_read1(_opb);
if(!val){
int qtj;
int plj;
if(qti>0){
val=oc_pack_read1(_opb);
if(val){
qtj=qti-1;
plj=pli;
}
else{
qtj=(i-1)/3;
plj=(i-1)%3;
}
}
else{
qtj=(i-1)/3;
plj=(i-1)%3;
}
*qranges=*(_qinfo->qi_ranges[qtj]+plj);
continue;
}
}
val=oc_pack_read(_opb,nbits);
indices[0]=(int)val;
for(qi=qri=0;qi<63;){
val=oc_pack_read(_opb,oc_ilog(62-qi));
sizes[qri]=(int)val+1;
qi+=(int)val+1;
val=oc_pack_read(_opb,nbits);
indices[++qri]=(int)val;
}
/*Note: The caller is responsible for cleaning up any partially
constructed qinfo.*/
if(qi>63){
_ogg_free(base_mats);
return TH_EBADHEADER;
}
qranges->nranges=qri;
qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0]));
if(qranges->sizes==NULL){
/*Note: The caller is responsible for cleaning up any partially
constructed qinfo.*/
_ogg_free(base_mats);
return TH_EFAULT;
}
memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0]));
qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0]));
if(qrbms==NULL){
/*Note: The caller is responsible for cleaning up any partially
constructed qinfo.*/
_ogg_free(base_mats);
return TH_EFAULT;
}
qranges->base_matrices=(const th_quant_base *)qrbms;
do{
bmi=indices[qri];
/*Note: The caller is responsible for cleaning up any partially
constructed qinfo.*/
if(bmi>=nbase_mats){
_ogg_free(base_mats);
return TH_EBADHEADER;
}
memcpy(qrbms[qri],base_mats[bmi],sizeof(qrbms[qri]));
}
while(qri-->0);
}
_ogg_free(base_mats);
return 0;
}
void oc_quant_params_clear(th_quant_info *_qinfo){
int i;
for(i=6;i-->0;){
int qti;
int pli;
qti=i/3;
pli=i%3;
/*Clear any duplicate pointer references.*/
if(i>0){
int qtj;
int plj;
qtj=(i-1)/3;
plj=(i-1)%3;
if(_qinfo->qi_ranges[qti][pli].sizes==
_qinfo->qi_ranges[qtj][plj].sizes){
_qinfo->qi_ranges[qti][pli].sizes=NULL;
}
if(_qinfo->qi_ranges[qti][pli].base_matrices==
_qinfo->qi_ranges[qtj][plj].base_matrices){
_qinfo->qi_ranges[qti][pli].base_matrices=NULL;
}
}
if(qti>0){
if(_qinfo->qi_ranges[1][pli].sizes==
_qinfo->qi_ranges[0][pli].sizes){
_qinfo->qi_ranges[1][pli].sizes=NULL;
}
if(_qinfo->qi_ranges[1][pli].base_matrices==
_qinfo->qi_ranges[0][pli].base_matrices){
_qinfo->qi_ranges[1][pli].base_matrices=NULL;
}
}
/*Now free all the non-duplicate storage.*/
_ogg_free((void *)_qinfo->qi_ranges[qti][pli].sizes);
_ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices);
}
}

View File

@ -0,0 +1,27 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: dequant.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#if !defined(_dequant_H)
# define _dequant_H (1)
# include "quant.h"
# include "bitpack.h"
int oc_quant_params_unpack(oc_pack_buf *_opb,
th_quant_info *_qinfo);
void oc_quant_params_clear(th_quant_info *_qinfo);
#endif

View File

@ -0,0 +1,168 @@
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "apiwrapper.h"
#include "encint.h"
#include "theora/theoraenc.h"
static void th_enc_api_clear(th_api_wrapper *_api){
if(_api->encode)th_encode_free(_api->encode);
memset(_api,0,sizeof(*_api));
}
static void theora_encode_clear(theora_state *_te){
if(_te->i!=NULL)theora_info_clear(_te->i);
memset(_te,0,sizeof(*_te));
}
static int theora_encode_control(theora_state *_te,int _req,
void *_buf,size_t _buf_sz){
return th_encode_ctl(((th_api_wrapper *)_te->i->codec_setup)->encode,
_req,_buf,_buf_sz);
}
static ogg_int64_t theora_encode_granule_frame(theora_state *_te,
ogg_int64_t _gp){
return th_granule_frame(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp);
}
static double theora_encode_granule_time(theora_state *_te,ogg_int64_t _gp){
return th_granule_time(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp);
}
static const oc_state_dispatch_vtable OC_ENC_DISPATCH_VTBL={
(oc_state_clear_func)theora_encode_clear,
(oc_state_control_func)theora_encode_control,
(oc_state_granule_frame_func)theora_encode_granule_frame,
(oc_state_granule_time_func)theora_encode_granule_time,
};
int theora_encode_init(theora_state *_te,theora_info *_ci){
th_api_info *apiinfo;
th_info info;
ogg_uint32_t keyframe_frequency_force;
/*Allocate our own combined API wrapper/theora_info struct.
We put them both in one malloc'd block so that when the API wrapper is
freed, the info struct goes with it.
This avoids having to figure out whether or not we need to free the info
struct in either theora_info_clear() or theora_clear().*/
apiinfo=(th_api_info *)_ogg_malloc(sizeof(*apiinfo));
if(apiinfo==NULL)return TH_EFAULT;
/*Make our own copy of the info struct, since its lifetime should be
independent of the one we were passed in.*/
*&apiinfo->info=*_ci;
oc_theora_info2th_info(&info,_ci);
apiinfo->api.encode=th_encode_alloc(&info);
if(apiinfo->api.encode==NULL){
_ogg_free(apiinfo);
return OC_EINVAL;
}
apiinfo->api.clear=(oc_setup_clear_func)th_enc_api_clear;
/*Provide entry points for ABI compatibility with old decoder shared libs.*/
_te->internal_encode=(void *)&OC_ENC_DISPATCH_VTBL;
_te->internal_decode=NULL;
_te->granulepos=0;
_te->i=&apiinfo->info;
_te->i->codec_setup=&apiinfo->api;
/*Set the precise requested keyframe frequency.*/
keyframe_frequency_force=_ci->keyframe_auto_p?
_ci->keyframe_frequency_force:_ci->keyframe_frequency;
th_encode_ctl(apiinfo->api.encode,
TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE,
&keyframe_frequency_force,sizeof(keyframe_frequency_force));
/*TODO: Additional codec setup using the extra fields in theora_info.*/
return 0;
}
int theora_encode_YUVin(theora_state *_te,yuv_buffer *_yuv){
th_api_wrapper *api;
th_ycbcr_buffer buf;
int ret;
api=(th_api_wrapper *)_te->i->codec_setup;
buf[0].width=_yuv->y_width;
buf[0].height=_yuv->y_height;
buf[0].stride=_yuv->y_stride;
buf[0].data=_yuv->y;
buf[1].width=_yuv->uv_width;
buf[1].height=_yuv->uv_height;
buf[1].stride=_yuv->uv_stride;
buf[1].data=_yuv->u;
buf[2].width=_yuv->uv_width;
buf[2].height=_yuv->uv_height;
buf[2].stride=_yuv->uv_stride;
buf[2].data=_yuv->v;
ret=th_encode_ycbcr_in(api->encode,buf);
if(ret<0)return ret;
_te->granulepos=api->encode->state.granpos;
return ret;
}
int theora_encode_packetout(theora_state *_te,int _last_p,ogg_packet *_op){
th_api_wrapper *api;
api=(th_api_wrapper *)_te->i->codec_setup;
return th_encode_packetout(api->encode,_last_p,_op);
}
int theora_encode_header(theora_state *_te,ogg_packet *_op){
oc_enc_ctx *enc;
th_api_wrapper *api;
int ret;
api=(th_api_wrapper *)_te->i->codec_setup;
enc=api->encode;
/*If we've already started encoding, fail.*/
if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){
return TH_EINVAL;
}
/*Reset the state to make sure we output an info packet.*/
enc->packet_state=OC_PACKET_INFO_HDR;
ret=th_encode_flushheader(api->encode,NULL,_op);
return ret>=0?0:ret;
}
int theora_encode_comment(theora_comment *_tc,ogg_packet *_op){
oggpack_buffer opb;
void *buf;
int packet_state;
int ret;
packet_state=OC_PACKET_COMMENT_HDR;
oggpackB_writeinit(&opb);
ret=oc_state_flushheader(NULL,&packet_state,&opb,NULL,NULL,
th_version_string(),(th_comment *)_tc,_op);
if(ret>=0){
/*The oggpack_buffer's lifetime ends with this function, so we have to
copy out the packet contents.
Presumably the application knows it is supposed to free this.
This part works nothing like the Vorbis API, and the documentation on it
has been wrong for some time, claiming libtheora owned the memory.*/
buf=_ogg_malloc(_op->bytes);
if(buf==NULL){
_op->packet=NULL;
ret=TH_EFAULT;
}
else{
memcpy(buf,_op->packet,_op->bytes);
_op->packet=buf;
ret=0;
}
}
oggpack_writeclear(&opb);
return ret;
}
int theora_encode_tables(theora_state *_te,ogg_packet *_op){
oc_enc_ctx *enc;
th_api_wrapper *api;
int ret;
api=(th_api_wrapper *)_te->i->codec_setup;
enc=api->encode;
/*If we've already started encoding, fail.*/
if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){
return TH_EINVAL;
}
/*Reset the state to make sure we output a setup packet.*/
enc->packet_state=OC_PACKET_SETUP_HDR;
ret=th_encode_flushheader(api->encode,NULL,_op);
return ret>=0?0:ret;
}

View File

@ -0,0 +1,388 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: encfrag.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include <stdlib.h>
#include <string.h>
#include "encint.h"
void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
const unsigned char *_src,const unsigned char *_ref,int _ystride){
(*_enc->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride);
}
void oc_enc_frag_sub_c(ogg_int16_t _diff[64],const unsigned char *_src,
const unsigned char *_ref,int _ystride){
int i;
for(i=0;i<8;i++){
int j;
for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-_ref[j]);
_src+=_ystride;
_ref+=_ystride;
}
}
void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
const unsigned char *_src,int _ystride){
(*_enc->opt_vtable.frag_sub_128)(_diff,_src,_ystride);
}
void oc_enc_frag_sub_128_c(ogg_int16_t *_diff,
const unsigned char *_src,int _ystride){
int i;
for(i=0;i<8;i++){
int j;
for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-128);
_src+=_ystride;
}
}
unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_x,
const unsigned char *_y,int _ystride){
return (*_enc->opt_vtable.frag_sad)(_x,_y,_ystride);
}
unsigned oc_enc_frag_sad_c(const unsigned char *_src,
const unsigned char *_ref,int _ystride){
unsigned sad;
int i;
sad=0;
for(i=8;i-->0;){
int j;
for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]);
_src+=_ystride;
_ref+=_ystride;
}
return sad;
}
unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc,
const unsigned char *_src,const unsigned char *_ref,int _ystride,
unsigned _thresh){
return (*_enc->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh);
}
unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _thresh){
unsigned sad;
int i;
sad=0;
for(i=8;i-->0;){
int j;
for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]);
if(sad>_thresh)break;
_src+=_ystride;
_ref+=_ystride;
}
return sad;
}
unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc,
const unsigned char *_src,const unsigned char *_ref1,
const unsigned char *_ref2,int _ystride,unsigned _thresh){
return (*_enc->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride,
_thresh);
}
unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh){
unsigned sad;
int i;
sad=0;
for(i=8;i-->0;){
int j;
for(j=0;j<8;j++)sad+=abs(_src[j]-(_ref1[j]+_ref2[j]>>1));
if(sad>_thresh)break;
_src+=_ystride;
_ref1+=_ystride;
_ref2+=_ystride;
}
return sad;
}
static void oc_diff_hadamard(ogg_int16_t _buf[64],const unsigned char *_src,
const unsigned char *_ref,int _ystride){
int i;
for(i=0;i<8;i++){
int t0;
int t1;
int t2;
int t3;
int t4;
int t5;
int t6;
int t7;
int r;
/*Hadamard stage 1:*/
t0=_src[0]-_ref[0]+_src[4]-_ref[4];
t4=_src[0]-_ref[0]-_src[4]+_ref[4];
t1=_src[1]-_ref[1]+_src[5]-_ref[5];
t5=_src[1]-_ref[1]-_src[5]+_ref[5];
t2=_src[2]-_ref[2]+_src[6]-_ref[6];
t6=_src[2]-_ref[2]-_src[6]+_ref[6];
t3=_src[3]-_ref[3]+_src[7]-_ref[7];
t7=_src[3]-_ref[3]-_src[7]+_ref[7];
/*Hadamard stage 2:*/
r=t0;
t0+=t2;
t2=r-t2;
r=t1;
t1+=t3;
t3=r-t3;
r=t4;
t4+=t6;
t6=r-t6;
r=t5;
t5+=t7;
t7=r-t7;
/*Hadamard stage 3:*/
_buf[0*8+i]=(ogg_int16_t)(t0+t1);
_buf[1*8+i]=(ogg_int16_t)(t0-t1);
_buf[2*8+i]=(ogg_int16_t)(t2+t3);
_buf[3*8+i]=(ogg_int16_t)(t2-t3);
_buf[4*8+i]=(ogg_int16_t)(t4+t5);
_buf[5*8+i]=(ogg_int16_t)(t4-t5);
_buf[6*8+i]=(ogg_int16_t)(t6+t7);
_buf[7*8+i]=(ogg_int16_t)(t6-t7);
_src+=_ystride;
_ref+=_ystride;
}
}
static void oc_diff_hadamard2(ogg_int16_t _buf[64],const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){
int i;
for(i=0;i<8;i++){
int t0;
int t1;
int t2;
int t3;
int t4;
int t5;
int t6;
int t7;
int r;
/*Hadamard stage 1:*/
r=_ref1[0]+_ref2[0]>>1;
t4=_ref1[4]+_ref2[4]>>1;
t0=_src[0]-r+_src[4]-t4;
t4=_src[0]-r-_src[4]+t4;
r=_ref1[1]+_ref2[1]>>1;
t5=_ref1[5]+_ref2[5]>>1;
t1=_src[1]-r+_src[5]-t5;
t5=_src[1]-r-_src[5]+t5;
r=_ref1[2]+_ref2[2]>>1;
t6=_ref1[6]+_ref2[6]>>1;
t2=_src[2]-r+_src[6]-t6;
t6=_src[2]-r-_src[6]+t6;
r=_ref1[3]+_ref2[3]>>1;
t7=_ref1[7]+_ref2[7]>>1;
t3=_src[3]-r+_src[7]-t7;
t7=_src[3]-r-_src[7]+t7;
/*Hadamard stage 2:*/
r=t0;
t0+=t2;
t2=r-t2;
r=t1;
t1+=t3;
t3=r-t3;
r=t4;
t4+=t6;
t6=r-t6;
r=t5;
t5+=t7;
t7=r-t7;
/*Hadamard stage 3:*/
_buf[0*8+i]=(ogg_int16_t)(t0+t1);
_buf[1*8+i]=(ogg_int16_t)(t0-t1);
_buf[2*8+i]=(ogg_int16_t)(t2+t3);
_buf[3*8+i]=(ogg_int16_t)(t2-t3);
_buf[4*8+i]=(ogg_int16_t)(t4+t5);
_buf[5*8+i]=(ogg_int16_t)(t4-t5);
_buf[6*8+i]=(ogg_int16_t)(t6+t7);
_buf[7*8+i]=(ogg_int16_t)(t6-t7);
_src+=_ystride;
_ref1+=_ystride;
_ref2+=_ystride;
}
}
static void oc_intra_hadamard(ogg_int16_t _buf[64],const unsigned char *_src,
int _ystride){
int i;
for(i=0;i<8;i++){
int t0;
int t1;
int t2;
int t3;
int t4;
int t5;
int t6;
int t7;
int r;
/*Hadamard stage 1:*/
t0=_src[0]+_src[4];
t4=_src[0]-_src[4];
t1=_src[1]+_src[5];
t5=_src[1]-_src[5];
t2=_src[2]+_src[6];
t6=_src[2]-_src[6];
t3=_src[3]+_src[7];
t7=_src[3]-_src[7];
/*Hadamard stage 2:*/
r=t0;
t0+=t2;
t2=r-t2;
r=t1;
t1+=t3;
t3=r-t3;
r=t4;
t4+=t6;
t6=r-t6;
r=t5;
t5+=t7;
t7=r-t7;
/*Hadamard stage 3:*/
_buf[0*8+i]=(ogg_int16_t)(t0+t1);
_buf[1*8+i]=(ogg_int16_t)(t0-t1);
_buf[2*8+i]=(ogg_int16_t)(t2+t3);
_buf[3*8+i]=(ogg_int16_t)(t2-t3);
_buf[4*8+i]=(ogg_int16_t)(t4+t5);
_buf[5*8+i]=(ogg_int16_t)(t4-t5);
_buf[6*8+i]=(ogg_int16_t)(t6+t7);
_buf[7*8+i]=(ogg_int16_t)(t6-t7);
_src+=_ystride;
}
}
unsigned oc_hadamard_sad_thresh(const ogg_int16_t _buf[64],unsigned _thresh){
unsigned sad;
int t0;
int t1;
int t2;
int t3;
int t4;
int t5;
int t6;
int t7;
int r;
int i;
sad=0;
for(i=0;i<8;i++){
/*Hadamard stage 1:*/
t0=_buf[i*8+0]+_buf[i*8+4];
t4=_buf[i*8+0]-_buf[i*8+4];
t1=_buf[i*8+1]+_buf[i*8+5];
t5=_buf[i*8+1]-_buf[i*8+5];
t2=_buf[i*8+2]+_buf[i*8+6];
t6=_buf[i*8+2]-_buf[i*8+6];
t3=_buf[i*8+3]+_buf[i*8+7];
t7=_buf[i*8+3]-_buf[i*8+7];
/*Hadamard stage 2:*/
r=t0;
t0+=t2;
t2=r-t2;
r=t1;
t1+=t3;
t3=r-t3;
r=t4;
t4+=t6;
t6=r-t6;
r=t5;
t5+=t7;
t7=r-t7;
/*Hadamard stage 3:*/
r=abs(t0+t1);
r+=abs(t0-t1);
r+=abs(t2+t3);
r+=abs(t2-t3);
r+=abs(t4+t5);
r+=abs(t4-t5);
r+=abs(t6+t7);
r+=abs(t6-t7);
sad+=r;
if(sad>_thresh)break;
}
return sad;
}
unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
const unsigned char *_src,const unsigned char *_ref,int _ystride,
unsigned _thresh){
return (*_enc->opt_vtable.frag_satd_thresh)(_src,_ref,_ystride,_thresh);
}
unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _thresh){
ogg_int16_t buf[64];
oc_diff_hadamard(buf,_src,_ref,_ystride);
return oc_hadamard_sad_thresh(buf,_thresh);
}
unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
const unsigned char *_src,const unsigned char *_ref1,
const unsigned char *_ref2,int _ystride,unsigned _thresh){
return (*_enc->opt_vtable.frag_satd2_thresh)(_src,_ref1,_ref2,_ystride,
_thresh);
}
unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh){
ogg_int16_t buf[64];
oc_diff_hadamard2(buf,_src,_ref1,_ref2,_ystride);
return oc_hadamard_sad_thresh(buf,_thresh);
}
unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
const unsigned char *_src,int _ystride){
return (*_enc->opt_vtable.frag_intra_satd)(_src,_ystride);
}
unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride){
ogg_int16_t buf[64];
oc_intra_hadamard(buf,_src,_ystride);
return oc_hadamard_sad_thresh(buf,UINT_MAX)
-abs(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
}
void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
const unsigned char *_src1,const unsigned char *_src2,int _ystride){
(*_enc->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride);
}
void oc_enc_frag_copy2_c(unsigned char *_dst,
const unsigned char *_src1,const unsigned char *_src2,int _ystride){
int i;
int j;
for(i=8;i-->0;){
for(j=0;j<8;j++)_dst[j]=_src1[j]+_src2[j]>>1;
_dst+=_ystride;
_src1+=_ystride;
_src2+=_ystride;
}
}
void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc,
unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]){
(*_enc->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue);
}
void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst,
const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
(*_enc->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue);
}

View File

@ -0,0 +1,121 @@
#include <stdlib.h>
#include <string.h>
#include "internal.h"
#include "enquant.h"
#include "huffenc.h"
/*Packs a series of octets from a given byte array into the pack buffer.
_opb: The pack buffer to store the octets in.
_buf: The byte array containing the bytes to pack.
_len: The number of octets to pack.*/
static void oc_pack_octets(oggpack_buffer *_opb,const char *_buf,int _len){
int i;
for(i=0;i<_len;i++)oggpackB_write(_opb,_buf[i],8);
}
int oc_state_flushheader(oc_theora_state *_state,int *_packet_state,
oggpack_buffer *_opb,const th_quant_info *_qinfo,
const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS],
const char *_vendor,th_comment *_tc,ogg_packet *_op){
unsigned char *packet;
int b_o_s;
if(_op==NULL)return TH_EFAULT;
switch(*_packet_state){
/*Codec info header.*/
case OC_PACKET_INFO_HDR:{
if(_state==NULL)return TH_EFAULT;
oggpackB_reset(_opb);
/*Mark this packet as the info header.*/
oggpackB_write(_opb,0x80,8);
/*Write the codec string.*/
oc_pack_octets(_opb,"theora",6);
/*Write the codec bitstream version.*/
oggpackB_write(_opb,TH_VERSION_MAJOR,8);
oggpackB_write(_opb,TH_VERSION_MINOR,8);
oggpackB_write(_opb,TH_VERSION_SUB,8);
/*Describe the encoded frame.*/
oggpackB_write(_opb,_state->info.frame_width>>4,16);
oggpackB_write(_opb,_state->info.frame_height>>4,16);
oggpackB_write(_opb,_state->info.pic_width,24);
oggpackB_write(_opb,_state->info.pic_height,24);
oggpackB_write(_opb,_state->info.pic_x,8);
oggpackB_write(_opb,_state->info.pic_y,8);
oggpackB_write(_opb,_state->info.fps_numerator,32);
oggpackB_write(_opb,_state->info.fps_denominator,32);
oggpackB_write(_opb,_state->info.aspect_numerator,24);
oggpackB_write(_opb,_state->info.aspect_denominator,24);
oggpackB_write(_opb,_state->info.colorspace,8);
oggpackB_write(_opb,_state->info.target_bitrate,24);
oggpackB_write(_opb,_state->info.quality,6);
oggpackB_write(_opb,_state->info.keyframe_granule_shift,5);
oggpackB_write(_opb,_state->info.pixel_fmt,2);
/*Spare configuration bits.*/
oggpackB_write(_opb,0,3);
b_o_s=1;
}break;
/*Comment header.*/
case OC_PACKET_COMMENT_HDR:{
int vendor_len;
int i;
if(_tc==NULL)return TH_EFAULT;
vendor_len=strlen(_vendor);
oggpackB_reset(_opb);
/*Mark this packet as the comment header.*/
oggpackB_write(_opb,0x81,8);
/*Write the codec string.*/
oc_pack_octets(_opb,"theora",6);
/*Write the vendor string.*/
oggpack_write(_opb,vendor_len,32);
oc_pack_octets(_opb,_vendor,vendor_len);
oggpack_write(_opb,_tc->comments,32);
for(i=0;i<_tc->comments;i++){
if(_tc->user_comments[i]!=NULL){
oggpack_write(_opb,_tc->comment_lengths[i],32);
oc_pack_octets(_opb,_tc->user_comments[i],_tc->comment_lengths[i]);
}
else oggpack_write(_opb,0,32);
}
b_o_s=0;
}break;
/*Codec setup header.*/
case OC_PACKET_SETUP_HDR:{
int ret;
oggpackB_reset(_opb);
/*Mark this packet as the setup header.*/
oggpackB_write(_opb,0x82,8);
/*Write the codec string.*/
oc_pack_octets(_opb,"theora",6);
/*Write the quantizer tables.*/
oc_quant_params_pack(_opb,_qinfo);
/*Write the huffman codes.*/
ret=oc_huff_codes_pack(_opb,_codes);
/*This should never happen, because we validate the tables when they
are set.
If you see, it's a good chance memory is being corrupted.*/
if(ret<0)return ret;
b_o_s=0;
}break;
/*No more headers to emit.*/
default:return 0;
}
/*This is kind of fugly: we hand the user a buffer which they do not own.
We will overwrite it when the next packet is output, so the user better be
done with it by then.
Vorbis is little better: it hands back buffers that it will free the next
time the headers are requested, or when the encoder is cleared.
Hopefully libogg2 will make this much cleaner.*/
packet=oggpackB_get_buffer(_opb);
/*If there's no packet, malloc failed while writing.*/
if(packet==NULL)return TH_EFAULT;
_op->packet=packet;
_op->bytes=oggpackB_bytes(_opb);
_op->b_o_s=b_o_s;
_op->e_o_s=0;
_op->granulepos=0;
_op->packetno=*_packet_state+3;
return ++(*_packet_state)+3;
}

View File

@ -0,0 +1,493 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: encint.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#if !defined(_encint_H)
# define _encint_H (1)
# if defined(HAVE_CONFIG_H)
# include "config.h"
# endif
# include "theora/theoraenc.h"
# include "internal.h"
# include "ocintrin.h"
# include "mathops.h"
# include "enquant.h"
# include "huffenc.h"
/*# define OC_COLLECT_METRICS*/
typedef oc_mv oc_mv2[2];
typedef struct oc_enc_opt_vtable oc_enc_opt_vtable;
typedef struct oc_mb_enc_info oc_mb_enc_info;
typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser;
typedef struct oc_iir_filter oc_iir_filter;
typedef struct oc_frame_metrics oc_frame_metrics;
typedef struct oc_rc_state oc_rc_state;
typedef struct th_enc_ctx oc_enc_ctx;
typedef struct oc_token_checkpoint oc_token_checkpoint;
/*Constants for the packet-out state machine specific to the encoder.*/
/*Next packet to emit: Data packet, but none are ready yet.*/
#define OC_PACKET_EMPTY (0)
/*Next packet to emit: Data packet, and one is ready.*/
#define OC_PACKET_READY (1)
/*All features enabled.*/
#define OC_SP_LEVEL_SLOW (0)
/*Enable early skip.*/
#define OC_SP_LEVEL_EARLY_SKIP (1)
/*Disable motion compensation.*/
#define OC_SP_LEVEL_NOMC (2)
/*Maximum valid speed level.*/
#define OC_SP_LEVEL_MAX (2)
/*The bits used for each of the MB mode codebooks.*/
extern const unsigned char OC_MODE_BITS[2][OC_NMODES];
/*The bits used for each of the MV codebooks.*/
extern const unsigned char OC_MV_BITS[2][64];
/*The minimum value that can be stored in a SB run for each codeword.
The last entry is the upper bound on the length of a single SB run.*/
extern const ogg_uint16_t OC_SB_RUN_VAL_MIN[8];
/*The bits used for each SB run codeword.*/
extern const unsigned char OC_SB_RUN_CODE_NBITS[7];
/*The bits used for each block run length (starting with 1).*/
extern const unsigned char OC_BLOCK_RUN_CODE_NBITS[30];
/*Encoder specific functions with accelerated variants.*/
struct oc_enc_opt_vtable{
unsigned (*frag_sad)(const unsigned char *_src,
const unsigned char *_ref,int _ystride);
unsigned (*frag_sad_thresh)(const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _thresh);
unsigned (*frag_sad2_thresh)(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh);
unsigned (*frag_satd_thresh)(const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _thresh);
unsigned (*frag_satd2_thresh)(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh);
unsigned (*frag_intra_satd)(const unsigned char *_src,int _ystride);
void (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src,
const unsigned char *_ref,int _ystride);
void (*frag_sub_128)(ogg_int16_t _diff[64],
const unsigned char *_src,int _ystride);
void (*frag_copy2)(unsigned char *_dst,
const unsigned char *_src1,const unsigned char *_src2,int _ystride);
void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
const ogg_int16_t _residue[64]);
void (*frag_recon_inter)(unsigned char *_dst,
const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
void (*fdct8x8)(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
};
void oc_enc_vtable_init(oc_enc_ctx *_enc);
/*Encoder-specific macroblock information.*/
struct oc_mb_enc_info{
/*Neighboring macro blocks that have MVs available from the current frame.*/
unsigned cneighbors[4];
/*Neighboring macro blocks to use for MVs from the previous frame.*/
unsigned pneighbors[4];
/*The number of current-frame neighbors.*/
unsigned char ncneighbors;
/*The number of previous-frame neighbors.*/
unsigned char npneighbors;
/*Flags indicating which MB modes have been refined.*/
unsigned char refined;
/*Motion vectors for a macro block for the current frame and the
previous two frames.
Each is a set of 2 vectors against OC_FRAME_GOLD and OC_FRAME_PREV, which
can be used to estimate constant velocity and constant acceleration
predictors.
Uninitialized MVs are (0,0).*/
oc_mv2 analysis_mv[3];
/*Current unrefined analysis MVs.*/
oc_mv unref_mv[2];
/*Unrefined block MVs.*/
oc_mv block_mv[4];
/*Refined block MVs.*/
oc_mv ref_mv[4];
/*Minimum motion estimation error from the analysis stage.*/
ogg_uint16_t error[2];
/*MB error for half-pel refinement for each frame type.*/
unsigned satd[2];
/*Block error for half-pel refinement.*/
unsigned block_satd[4];
};
/*State machine to estimate the opportunity cost of coding a MB mode.*/
struct oc_mode_scheme_chooser{
/*Pointers to the a list containing the index of each mode in the mode
alphabet used by each scheme.
The first entry points to the dynamic scheme0_ranks, while the remaining 7
point to the constant entries stored in OC_MODE_SCHEMES.*/
const unsigned char *mode_ranks[8];
/*The ranks for each mode when coded with scheme 0.
These are optimized so that the more frequent modes have lower ranks.*/
unsigned char scheme0_ranks[OC_NMODES];
/*The list of modes, sorted in descending order of frequency, that
corresponds to the ranks above.*/
unsigned char scheme0_list[OC_NMODES];
/*The number of times each mode has been chosen so far.*/
int mode_counts[OC_NMODES];
/*The list of mode coding schemes, sorted in ascending order of bit cost.*/
unsigned char scheme_list[8];
/*The number of bits used by each mode coding scheme.*/
ptrdiff_t scheme_bits[8];
};
void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser);
/*A 2nd order low-pass Bessel follower.
We use this for rate control because it has fast reaction time, but is
critically damped.*/
struct oc_iir_filter{
ogg_int32_t c[2];
ogg_int64_t g;
ogg_int32_t x[2];
ogg_int32_t y[2];
};
/*The 2-pass metrics associated with a single frame.*/
struct oc_frame_metrics{
/*The log base 2 of the scale factor for this frame in Q24 format.*/
ogg_int32_t log_scale;
/*The number of application-requested duplicates of this frame.*/
unsigned dup_count:31;
/*The frame type from pass 1.*/
unsigned frame_type:1;
};
/*Rate control state information.*/
struct oc_rc_state{
/*The target average bits per frame.*/
ogg_int64_t bits_per_frame;
/*The current buffer fullness (bits available to be used).*/
ogg_int64_t fullness;
/*The target buffer fullness.
This is where we'd like to be by the last keyframe the appears in the next
buf_delay frames.*/
ogg_int64_t target;
/*The maximum buffer fullness (total size of the buffer).*/
ogg_int64_t max;
/*The log of the number of pixels in a frame in Q57 format.*/
ogg_int64_t log_npixels;
/*The exponent used in the rate model in Q8 format.*/
unsigned exp[2];
/*The number of frames to distribute the buffer usage over.*/
int buf_delay;
/*The total drop count from the previous frame.
This includes duplicates explicitly requested via the
TH_ENCCTL_SET_DUP_COUNT API as well as frames we chose to drop ourselves.*/
ogg_uint32_t prev_drop_count;
/*The log of an estimated scale factor used to obtain the real framerate, for
VFR sources or, e.g., 12 fps content doubled to 24 fps, etc.*/
ogg_int64_t log_drop_scale;
/*The log of estimated scale factor for the rate model in Q57 format.*/
ogg_int64_t log_scale[2];
/*The log of the target quantizer level in Q57 format.*/
ogg_int64_t log_qtarget;
/*Will we drop frames to meet bitrate target?*/
unsigned char drop_frames;
/*Do we respect the maximum buffer fullness?*/
unsigned char cap_overflow;
/*Can the reservoir go negative?*/
unsigned char cap_underflow;
/*Second-order lowpass filters to track scale and VFR.*/
oc_iir_filter scalefilter[2];
int inter_count;
int inter_delay;
int inter_delay_target;
oc_iir_filter vfrfilter;
/*Two-pass mode state.
0 => 1-pass encoding.
1 => 1st pass of 2-pass encoding.
2 => 2nd pass of 2-pass encoding.*/
int twopass;
/*Buffer for current frame metrics.*/
unsigned char twopass_buffer[48];
/*The number of bytes in the frame metrics buffer.
When 2-pass encoding is enabled, this is set to 0 after each frame is
submitted, and must be non-zero before the next frame will be accepted.*/
int twopass_buffer_bytes;
int twopass_buffer_fill;
/*Whether or not to force the next frame to be a keyframe.*/
unsigned char twopass_force_kf;
/*The metrics for the previous frame.*/
oc_frame_metrics prev_metrics;
/*The metrics for the current frame.*/
oc_frame_metrics cur_metrics;
/*The buffered metrics for future frames.*/
oc_frame_metrics *frame_metrics;
int nframe_metrics;
int cframe_metrics;
/*The index of the current frame in the circular metric buffer.*/
int frame_metrics_head;
/*The frame count of each type (keyframes, delta frames, and dup frames);
32 bits limits us to 2.268 years at 60 fps.*/
ogg_uint32_t frames_total[3];
/*The number of frames of each type yet to be processed.*/
ogg_uint32_t frames_left[3];
/*The sum of the scale values for each frame type.*/
ogg_int64_t scale_sum[2];
/*The start of the window over which the current scale sums are taken.*/
int scale_window0;
/*The end of the window over which the current scale sums are taken.*/
int scale_window_end;
/*The frame count of each type in the current 2-pass window; this does not
include dup frames.*/
int nframes[3];
/*The total accumulated estimation bias.*/
ogg_int64_t rate_bias;
};
void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc);
void oc_rc_state_clear(oc_rc_state *_rc);
void oc_enc_rc_resize(oc_enc_ctx *_enc);
int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp);
void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _frame_type);
int oc_enc_update_rc_state(oc_enc_ctx *_enc,
long _bits,int _qti,int _qi,int _trial,int _droppable);
int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf);
int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes);
/*The internal encoder state.*/
struct th_enc_ctx{
/*Shared encoder/decoder state.*/
oc_theora_state state;
/*Buffer in which to assemble packets.*/
oggpack_buffer opb;
/*Encoder-specific macroblock information.*/
oc_mb_enc_info *mb_info;
/*DC coefficients after prediction.*/
ogg_int16_t *frag_dc;
/*The list of coded macro blocks, in coded order.*/
unsigned *coded_mbis;
/*The number of coded macro blocks.*/
size_t ncoded_mbis;
/*Whether or not packets are ready to be emitted.
This takes on negative values while there are remaining header packets to
be emitted, reaches 0 when the codec is ready for input, and becomes
positive when a frame has been processed and data packets are ready.*/
int packet_state;
/*The maximum distance between keyframes.*/
ogg_uint32_t keyframe_frequency_force;
/*The number of duplicates to produce for the next frame.*/
ogg_uint32_t dup_count;
/*The number of duplicates remaining to be emitted for the current frame.*/
ogg_uint32_t nqueued_dups;
/*The number of duplicates emitted for the last frame.*/
ogg_uint32_t prev_dup_count;
/*The current speed level.*/
int sp_level;
/*Whether or not VP3 compatibility mode has been enabled.*/
unsigned char vp3_compatible;
/*Whether or not any INTER frames have been coded.*/
unsigned char coded_inter_frame;
/*Whether or not previous frame was dropped.*/
unsigned char prevframe_dropped;
/*Stores most recently chosen Huffman tables for each frame type, DC and AC
coefficients, and luma and chroma tokens.
The actual Huffman table used for a given coefficient depends not only on
the choice made here, but also its index in the zig-zag ordering.*/
unsigned char huff_idxs[2][2][2];
/*Current count of bits used by each MV coding mode.*/
size_t mv_bits[2];
/*The mode scheme chooser for estimating mode coding costs.*/
oc_mode_scheme_chooser chooser;
/*The number of vertical super blocks in an MCU.*/
int mcu_nvsbs;
/*The SSD error for skipping each fragment in the current MCU.*/
unsigned *mcu_skip_ssd;
/*The DCT token lists for each coefficient and each plane.*/
unsigned char **dct_tokens[3];
/*The extra bits associated with each DCT token.*/
ogg_uint16_t **extra_bits[3];
/*The number of DCT tokens for each coefficient for each plane.*/
ptrdiff_t ndct_tokens[3][64];
/*Pending EOB runs for each coefficient for each plane.*/
ogg_uint16_t eob_run[3][64];
/*The offset of the first DCT token for each coefficient for each plane.*/
unsigned char dct_token_offs[3][64];
/*The last DC coefficient for each plane and reference frame.*/
int dc_pred_last[3][3];
#if defined(OC_COLLECT_METRICS)
/*Fragment SATD statistics for MB mode estimation metrics.*/
unsigned *frag_satd;
/*Fragment SSD statistics for MB mode estimation metrics.*/
unsigned *frag_ssd;
#endif
/*The R-D optimization parameter.*/
int lambda;
/*The huffman tables in use.*/
th_huff_code huff_codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
/*The quantization parameters in use.*/
th_quant_info qinfo;
oc_iquant *enquant_tables[64][3][2];
oc_iquant_table enquant_table_data[64][3][2];
/*An "average" quantizer for each quantizer type (INTRA or INTER) and qi
value.
This is used to paramterize the rate control decisions.
They are kept in the log domain to simplify later processing.
Keep in mind these are DCT domain quantizers, and so are scaled by an
additional factor of 4 from the pixel domain.*/
ogg_int64_t log_qavg[2][64];
/*The buffer state used to drive rate control.*/
oc_rc_state rc;
/*Table for encoder acceleration functions.*/
oc_enc_opt_vtable opt_vtable;
};
void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode);
int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode);
#if defined(OC_COLLECT_METRICS)
void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc);
void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc);
#endif
/*Perform fullpel motion search for a single MB against both reference frames.*/
void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi);
/*Refine a MB MV for one frame.*/
void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame);
/*Refine the block MVs.*/
void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi);
/*Used to rollback a tokenlog transaction when we retroactively decide to skip
a fragment.
A checkpoint is taken right before each token is added.*/
struct oc_token_checkpoint{
/*The color plane the token was added to.*/
unsigned char pli;
/*The zig-zag index the token was added to.*/
unsigned char zzi;
/*The outstanding EOB run count before the token was added.*/
ogg_uint16_t eob_run;
/*The token count before the token was added.*/
ptrdiff_t ndct_tokens;
};
void oc_enc_tokenize_start(oc_enc_ctx *_enc);
int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
int _zzi,oc_token_checkpoint **_stack,int _acmin);
void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc,
const oc_token_checkpoint *_stack,int _n);
void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc,
int _pli,int _fragy0,int _frag_yend);
void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli,
const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis,
int _prev_ndct_tokens1,int _prev_eob_run1);
void oc_enc_tokenize_finish(oc_enc_ctx *_enc);
/*Utility routine to encode one of the header packets.*/
int oc_state_flushheader(oc_theora_state *_state,int *_packet_state,
oggpack_buffer *_opb,const th_quant_info *_qinfo,
const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS],
const char *_vendor,th_comment *_tc,ogg_packet *_op);
/*Encoder-specific accelerated functions.*/
void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
const unsigned char *_src,const unsigned char *_ref,int _ystride);
void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
const unsigned char *_src,int _ystride);
unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_src,
const unsigned char *_ref,int _ystride);
unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc,
const unsigned char *_src,const unsigned char *_ref,int _ystride,
unsigned _thresh);
unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc,
const unsigned char *_src,const unsigned char *_ref1,
const unsigned char *_ref2,int _ystride,unsigned _thresh);
unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
const unsigned char *_src,const unsigned char *_ref,int _ystride,
unsigned _thresh);
unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
const unsigned char *_src,const unsigned char *_ref1,
const unsigned char *_ref2,int _ystride,unsigned _thresh);
unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
const unsigned char *_src,int _ystride);
void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
const unsigned char *_src1,const unsigned char *_src2,int _ystride);
void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc,
unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]);
void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst,
const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64],
const ogg_int16_t _x[64]);
/*Default pure-C implementations.*/
void oc_enc_vtable_init_c(oc_enc_ctx *_enc);
void oc_enc_frag_sub_c(ogg_int16_t _diff[64],
const unsigned char *_src,const unsigned char *_ref,int _ystride);
void oc_enc_frag_sub_128_c(ogg_int16_t _diff[64],
const unsigned char *_src,int _ystride);
void oc_enc_frag_copy2_c(unsigned char *_dst,
const unsigned char *_src1,const unsigned char *_src2,int _ystride);
unsigned oc_enc_frag_sad_c(const unsigned char *_src,
const unsigned char *_ref,int _ystride);
unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _thresh);
unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh);
unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _thresh);
unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh);
unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride);
void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,800 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: encoder_internal.h,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#ifndef ENCODER_INTERNAL_H
#define ENCODER_INTERNAL_H
#include <theora/theora.h>
#include "huffman.h"
#ifndef LIBOGG2
#define theora_read(x,y,z) ( *z = oggpackB_read(x,y) )
#else
#define theora_read(x,y,z) ( oggpackB_read(x,y,z) )
#endif
#define CURRENT_ENCODE_VERSION 1
#define HUGE_ERROR (1<<28) /* Out of range test value */
/* Baseline dct height and width. */
#define BLOCK_HEIGHT_WIDTH 8
#define HFRAGPIXELS 8
#define VFRAGPIXELS 8
/* Baseline dct block size */
#define BLOCK_SIZE (BLOCK_HEIGHT_WIDTH * BLOCK_HEIGHT_WIDTH)
/* Border is for unrestricted mv's */
#define UMV_BORDER 16
#define STRIDE_EXTRA (UMV_BORDER * 2)
#define Q_TABLE_SIZE 64
#define BASE_FRAME 0
#define NORMAL_FRAME 1
#define MAX_MODES 8
#define MODE_BITS 3
#define MODE_METHODS 8
#define MODE_METHOD_BITS 3
/* Different key frame types/methods */
#define DCT_KEY_FRAME 0
#define KEY_FRAME_CONTEXT 5
/* Preprocessor defines */
#define MAX_PREV_FRAMES 16
/* Number of search sites for a 4-step search (at pixel accuracy) */
#define MAX_SEARCH_SITES 33
#define VERY_BEST_Q 10
#define MIN_BPB_FACTOR 0.3
#define MAX_BPB_FACTOR 3.0
#define MAX_MV_EXTENT 31 /* Max search distance in half pixel increments */
typedef struct CONFIG_TYPE2{
double OutputFrameRate;
ogg_uint32_t TargetBandwidth;
ogg_uint32_t KeyFrameDataTarget ; /* Data rate target for key frames */
ogg_uint32_t FirstFrameQ;
ogg_uint32_t BaseQ;
ogg_uint32_t MaxQ; /* Absolute Max Q allowed. */
ogg_uint32_t ActiveMaxQ; /* Currently active Max Q */
} CONFIG_TYPE2;
typedef struct coeffNode{
int i;
struct coeffNode *next;
} COEFFNODE;
typedef struct{
unsigned char * Yuv0ptr;
unsigned char * Yuv1ptr;
unsigned char * SrfWorkSpcPtr;
unsigned char * disp_fragments;
ogg_uint32_t * RegionIndex; /* Gives pixel index for top left of
each block */
ogg_uint32_t VideoFrameHeight;
ogg_uint32_t VideoFrameWidth;
} SCAN_CONFIG_DATA;
typedef unsigned char YUV_BUFFER_ENTRY;
typedef struct{
ogg_int32_t x;
ogg_int32_t y;
} MOTION_VECTOR;
typedef MOTION_VECTOR COORDINATE;
typedef ogg_int16_t Q_LIST_ENTRY;
typedef struct PP_INSTANCE {
ogg_uint32_t PrevFrameLimit;
ogg_uint32_t *ScanPixelIndexTable;
signed char *ScanDisplayFragments;
signed char *PrevFragments[MAX_PREV_FRAMES];
ogg_uint32_t *FragScores; /* The individual frame difference ratings. */
signed char *SameGreyDirPixels;
signed char *BarBlockMap;
/* Number of pixels changed by diff threshold in row of a fragment. */
unsigned char *FragDiffPixels;
unsigned char *PixelScores;
unsigned char *PixelChangedMap;
unsigned char *ChLocals;
ogg_int16_t *yuv_differences;
ogg_int32_t *RowChangedPixels;
signed char *TmpCodedMap;
/* Plane pointers and dimension variables */
unsigned char * YPlanePtr0;
unsigned char * YPlanePtr1;
unsigned char * UPlanePtr0;
unsigned char * UPlanePtr1;
unsigned char * VPlanePtr0;
unsigned char * VPlanePtr1;
ogg_uint32_t VideoYPlaneWidth;
ogg_uint32_t VideoYPlaneHeight;
ogg_uint32_t VideoUVPlaneWidth;
ogg_uint32_t VideoUVPlaneHeight;
ogg_uint32_t VideoYPlaneStride;
ogg_uint32_t VideoUPlaneStride;
ogg_uint32_t VideoVPlaneStride;
/* Scan control variables. */
unsigned char HFragPixels;
unsigned char VFragPixels;
ogg_uint32_t ScanFrameFragments;
ogg_uint32_t ScanYPlaneFragments;
ogg_uint32_t ScanUVPlaneFragments;
ogg_uint32_t ScanHFragments;
ogg_uint32_t ScanVFragments;
ogg_uint32_t YFramePixels;
ogg_uint32_t UVFramePixels;
ogg_uint32_t SgcThresh;
ogg_uint32_t OutputBlocksUpdated;
ogg_uint32_t KFIndicator;
/* The pre-processor scan configuration. */
SCAN_CONFIG_DATA ScanConfig;
ogg_int32_t SRFGreyThresh;
ogg_int32_t SRFColThresh;
ogg_int32_t SgcLevelThresh;
ogg_int32_t SuvcLevelThresh;
ogg_uint32_t NoiseSupLevel;
/* Block Thresholds. */
ogg_uint32_t PrimaryBlockThreshold;
unsigned char LineSearchTripTresh;
int PAKEnabled;
int LevelThresh;
int NegLevelThresh;
int SrfThresh;
int NegSrfThresh;
int HighChange;
int NegHighChange;
/* Threshold lookup tables */
unsigned char SrfPakThreshTable[512];
unsigned char SrfThreshTable[512];
unsigned char SgcThreshTable[512];
/* Variables controlling S.A.D. break outs. */
ogg_uint32_t GrpLowSadThresh;
ogg_uint32_t GrpHighSadThresh;
ogg_uint32_t ModifiedGrpLowSadThresh;
ogg_uint32_t ModifiedGrpHighSadThresh;
ogg_int32_t PlaneHFragments;
ogg_int32_t PlaneVFragments;
ogg_int32_t PlaneHeight;
ogg_int32_t PlaneWidth;
ogg_int32_t PlaneStride;
ogg_uint32_t BlockThreshold;
ogg_uint32_t BlockSgcThresh;
double UVBlockThreshCorrection;
double UVSgcCorrection;
double YUVPlaneCorrectionFactor;
double AbsDiff_ScoreMultiplierTable[256];
unsigned char NoiseScoreBoostTable[256];
unsigned char MaxLineSearchLen;
ogg_int32_t YuvDiffsCircularBufferSize;
ogg_int32_t ChLocalsCircularBufferSize;
ogg_int32_t PixelMapCircularBufferSize;
} PP_INSTANCE;
typedef enum{
CODE_INTER_NO_MV = 0x0, /* INTER prediction, (0,0) motion
vector implied. */
CODE_INTRA = 0x1, /* INTRA i.e. no prediction. */
CODE_INTER_PLUS_MV = 0x2, /* INTER prediction, non zero motion
vector. */
CODE_INTER_LAST_MV = 0x3, /* Use Last Motion vector */
CODE_INTER_PRIOR_LAST = 0x4, /* Prior last motion vector */
CODE_USING_GOLDEN = 0x5, /* 'Golden frame' prediction (no MV). */
CODE_GOLDEN_MV = 0x6, /* 'Golden frame' prediction plus MV. */
CODE_INTER_FOURMV = 0x7 /* Inter prediction 4MV per macro block. */
} CODING_MODE;
typedef struct HUFF_ENTRY {
struct HUFF_ENTRY *ZeroChild;
struct HUFF_ENTRY *OneChild;
struct HUFF_ENTRY *Previous;
struct HUFF_ENTRY *Next;
ogg_int32_t Value;
ogg_uint32_t Frequency;
} HUFF_ENTRY;
typedef struct codec_setup_info {
ogg_uint32_t QThreshTable[Q_TABLE_SIZE];
Q_LIST_ENTRY DcScaleFactorTable[Q_TABLE_SIZE];
Q_LIST_ENTRY Y_coeffs[64];
Q_LIST_ENTRY UV_coeffs[64];
Q_LIST_ENTRY Inter_coeffs[64];
HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES];
} codec_setup_info;
typedef struct PB_INSTANCE {
oggpack_buffer *opb;
theora_info info;
/* how far do we shift the granulepos to seperate out P frame counts? */
int keyframe_granule_shift;
/***********************************************************************/
/* Decoder and Frame Type Information */
int DecoderErrorCode;
int FramesHaveBeenSkipped;
int PostProcessEnabled;
ogg_uint32_t PostProcessingLevel; /* Perform post processing */
/* Frame Info */
CODING_MODE CodingMode;
unsigned char FrameType;
unsigned char KeyFrameType;
ogg_uint32_t QualitySetting;
ogg_uint32_t FrameQIndex; /* Quality specified as a
table index */
ogg_uint32_t ThisFrameQualityValue; /* Quality value for this frame */
ogg_uint32_t LastFrameQualityValue; /* Last Frame's Quality */
ogg_int32_t CodedBlockIndex; /* Number of Coded Blocks */
ogg_uint32_t CodedBlocksThisFrame; /* Index into coded blocks */
ogg_uint32_t FrameSize; /* The number of bytes in the frame. */
/**********************************************************************/
/* Frame Size & Index Information */
ogg_uint32_t YPlaneSize;
ogg_uint32_t UVPlaneSize;
ogg_uint32_t YStride;
ogg_uint32_t UVStride;
ogg_uint32_t VFragments;
ogg_uint32_t HFragments;
ogg_uint32_t UnitFragments;
ogg_uint32_t YPlaneFragments;
ogg_uint32_t UVPlaneFragments;
ogg_uint32_t ReconYPlaneSize;
ogg_uint32_t ReconUVPlaneSize;
ogg_uint32_t YDataOffset;
ogg_uint32_t UDataOffset;
ogg_uint32_t VDataOffset;
ogg_uint32_t ReconYDataOffset;
ogg_uint32_t ReconUDataOffset;
ogg_uint32_t ReconVDataOffset;
ogg_uint32_t YSuperBlocks; /* Number of SuperBlocks in a Y frame */
ogg_uint32_t UVSuperBlocks; /* Number of SuperBlocks in a U or V frame */
ogg_uint32_t SuperBlocks; /* Total number of SuperBlocks in a
Y,U,V frame */
ogg_uint32_t YSBRows; /* Number of rows of SuperBlocks in a
Y frame */
ogg_uint32_t YSBCols; /* Number of cols of SuperBlocks in a
Y frame */
ogg_uint32_t UVSBRows; /* Number of rows of SuperBlocks in a
U or V frame */
ogg_uint32_t UVSBCols; /* Number of cols of SuperBlocks in a
U or V frame */
ogg_uint32_t YMacroBlocks; /* Number of Macro-Blocks in Y component */
ogg_uint32_t UVMacroBlocks; /* Number of Macro-Blocks in U/V component */
ogg_uint32_t MacroBlocks; /* Total number of Macro-Blocks */
/**********************************************************************/
/* Frames */
YUV_BUFFER_ENTRY *ThisFrameRecon;
YUV_BUFFER_ENTRY *GoldenFrame;
YUV_BUFFER_ENTRY *LastFrameRecon;
YUV_BUFFER_ENTRY *PostProcessBuffer;
/**********************************************************************/
/* Fragment Information */
ogg_uint32_t *pixel_index_table; /* start address of first
pixel of fragment in
source */
ogg_uint32_t *recon_pixel_index_table; /* start address of first
pixel in recon buffer */
unsigned char *display_fragments; /* Fragment update map */
unsigned char *skipped_display_fragments;/* whether fragment YUV
Conversion and update is to be
skipped */
ogg_int32_t *CodedBlockList; /* A list of fragment indices for
coded blocks. */
MOTION_VECTOR *FragMVect; /* fragment motion vectors */
ogg_uint32_t *FragTokenCounts; /* Number of tokens per fragment */
ogg_uint32_t (*TokenList)[128]; /* Fragment Token Pointers */
ogg_int32_t *FragmentVariances;
ogg_uint32_t *FragQIndex; /* Fragment Quality used in
PostProcess */
Q_LIST_ENTRY (*PPCoefBuffer)[64]; /* PostProcess Buffer for
coefficients data */
unsigned char *FragCoeffs; /* # of coeffs decoded so far for
fragment */
unsigned char *FragCoefEOB; /* Position of last non 0 coef
within QFragData */
Q_LIST_ENTRY (*QFragData)[64]; /* Fragment Coefficients
Array Pointers */
CODING_MODE *FragCodingMethod; /* coding method for the
fragment */
/***********************************************************************/
/* pointers to addresses used for allocation and deallocation the
others are rounded up to the nearest 32 bytes */
COEFFNODE *_Nodes;
ogg_uint32_t *transIndex; /* ptr to table of
transposed indexes */
/***********************************************************************/
ogg_int32_t bumpLast;
/* Macro Block and SuperBlock Information */
ogg_int32_t (*BlockMap)[4][4]; /* super block + sub macro
block + sub frag ->
FragIndex */
/* Coded flag arrays and counters for them */
unsigned char *SBCodedFlags;
unsigned char *SBFullyFlags;
unsigned char *MBCodedFlags;
unsigned char *MBFullyFlags;
/**********************************************************************/
ogg_uint32_t EOB_Run;
COORDINATE *FragCoordinates;
MOTION_VECTOR MVector;
ogg_int32_t ReconPtr2Offset; /* Offset for second reconstruction
in half pixel MC */
Q_LIST_ENTRY *quantized_list;
ogg_int16_t *ReconDataBuffer;
Q_LIST_ENTRY InvLastIntraDC;
Q_LIST_ENTRY InvLastInterDC;
Q_LIST_ENTRY LastIntraDC;
Q_LIST_ENTRY LastInterDC;
ogg_uint32_t BlocksToDecode; /* Blocks to be decoded this frame */
ogg_uint32_t DcHuffChoice; /* Huffman table selection variables */
unsigned char ACHuffChoice;
ogg_uint32_t QuadMBListIndex;
ogg_int32_t ByteCount;
ogg_uint32_t bit_pattern;
unsigned char bits_so_far;
unsigned char NextBit;
ogg_int32_t BitsLeft;
ogg_int16_t *DequantBuffer;
ogg_int32_t fp_quant_InterUV_coeffs[64];
ogg_int32_t fp_quant_InterUV_round[64];
ogg_int32_t fp_ZeroBinSize_InterUV[64];
ogg_int16_t *TmpReconBuffer;
ogg_int16_t *TmpDataBuffer;
/* Loop filter bounding values */
ogg_int32_t FiltBoundingValue[512];
/* Dequantiser and rounding tables */
ogg_uint32_t QThreshTable[Q_TABLE_SIZE];
Q_LIST_ENTRY DcScaleFactorTable[Q_TABLE_SIZE];
Q_LIST_ENTRY Y_coeffs[64];
Q_LIST_ENTRY UV_coeffs[64];
Q_LIST_ENTRY Inter_coeffs[64];
Q_LIST_ENTRY *dequant_InterUV_coeffs;
unsigned int quant_index[64];
ogg_int32_t quant_Y_coeffs[64];
ogg_int32_t quant_UV_coeffs[64];
ogg_int32_t fp_quant_Y_coeffs[64]; /* used in reiniting quantizers */
HUFF_ENTRY *HuffRoot_VP3x[NUM_HUFF_TABLES];
ogg_uint32_t *HuffCodeArray_VP3x[NUM_HUFF_TABLES];
unsigned char *HuffCodeLengthArray_VP3x[NUM_HUFF_TABLES];
unsigned char *ExtraBitLengths_VP3x;
/* Quantiser and rounding tables */
ogg_int32_t fp_quant_UV_coeffs[64];
ogg_int32_t fp_quant_Inter_coeffs[64];
ogg_int32_t fp_quant_Y_round[64];
ogg_int32_t fp_quant_UV_round[64];
ogg_int32_t fp_quant_Inter_round[64];
ogg_int32_t fp_ZeroBinSize_Y[64];
ogg_int32_t fp_ZeroBinSize_UV[64];
ogg_int32_t fp_ZeroBinSize_Inter[64];
ogg_int32_t *fquant_coeffs;
ogg_int32_t *fquant_round;
ogg_int32_t *fquant_ZbSize;
Q_LIST_ENTRY *dequant_Y_coeffs;
Q_LIST_ENTRY *dequant_UV_coeffs;
Q_LIST_ENTRY *dequant_Inter_coeffs;
Q_LIST_ENTRY *dequant_coeffs;
/* Predictor used in choosing entropy table for decoding block patterns. */
unsigned char BlockPatternPredictor;
short Modifier[4][512];
short *ModifierPointer[4];
unsigned char *DataOutputInPtr;
} PB_INSTANCE;
typedef struct CP_INSTANCE {
/* Compressor Configuration */
SCAN_CONFIG_DATA ScanConfig;
CONFIG_TYPE2 Configuration;
int GoldenFrameEnabled;
int InterPrediction;
int MotionCompensation;
ogg_uint32_t LastKeyFrame ;
ogg_int32_t DropCount ;
ogg_int32_t MaxConsDroppedFrames ;
ogg_int32_t DropFrameTriggerBytes;
int DropFrameCandidate;
/* Compressor Statistics */
double TotErrScore;
ogg_int64_t KeyFrameCount; /* Count of key frames. */
ogg_int64_t TotKeyFrameBytes;
ogg_uint32_t LastKeyFrameSize;
ogg_uint32_t PriorKeyFrameSize[KEY_FRAME_CONTEXT];
ogg_uint32_t PriorKeyFrameDistance[KEY_FRAME_CONTEXT];
ogg_int32_t FrameQuality[6];
int DecoderErrorCode; /* Decoder error flag. */
ogg_int32_t ThreshMapThreshold;
ogg_int32_t TotalMotionScore;
ogg_int64_t TotalByteCount;
ogg_int32_t FixedQ;
/* Frame Statistics */
signed char InterCodeCount;
ogg_int64_t CurrentFrame;
ogg_int64_t CarryOver ;
ogg_uint32_t LastFrameSize;
ogg_uint32_t FrameBitCount;
int ThisIsFirstFrame;
int ThisIsKeyFrame;
ogg_int32_t MotionScore;
ogg_uint32_t RegulationBlocks;
ogg_int32_t RecoveryMotionScore;
int RecoveryBlocksAdded ;
double ProportionRecBlocks;
double MaxRecFactor ;
/* Rate Targeting variables. */
ogg_uint32_t ThisFrameTargetBytes;
double BpbCorrectionFactor;
/* Up regulation variables */
ogg_uint32_t FinalPassLastPos; /* Used to regulate a final
unrestricted high quality
pass. */
ogg_uint32_t LastEndSB; /* Where we were in the loop
last time. */
ogg_uint32_t ResidueLastEndSB; /* Where we were in the residue
update loop last time. */
/* Controlling Block Selection */
ogg_uint32_t MVChangeFactor;
ogg_uint32_t FourMvChangeFactor;
ogg_uint32_t MinImprovementForNewMV;
ogg_uint32_t ExhaustiveSearchThresh;
ogg_uint32_t MinImprovementForFourMV;
ogg_uint32_t FourMVThreshold;
/* Module shared data structures. */
ogg_int32_t frame_target_rate;
ogg_int32_t BaseLineFrameTargetRate;
ogg_int32_t min_blocks_per_frame;
ogg_uint32_t tot_bytes_old;
/*********************************************************************/
/* Frames Used in the selecetive convolution filtering of the Y plane. */
unsigned char *ConvDestBuffer;
YUV_BUFFER_ENTRY *yuv0ptr;
YUV_BUFFER_ENTRY *yuv1ptr;
/*********************************************************************/
/*********************************************************************/
/* Token Buffers */
ogg_uint32_t *OptimisedTokenListEb; /* Optimised token list extra bits */
unsigned char *OptimisedTokenList; /* Optimised token list. */
unsigned char *OptimisedTokenListHi; /* Optimised token list huffman
table index */
unsigned char *OptimisedTokenListPl; /* Plane to which the token
belongs Y = 0 or UV = 1 */
ogg_int32_t OptimisedTokenCount; /* Count of Optimized tokens */
ogg_uint32_t RunHuffIndex; /* Huffman table in force at
the start of a run */
ogg_uint32_t RunPlaneIndex; /* The plane (Y=0 UV=1) to
which the first token in
an EOB run belonged. */
ogg_uint32_t TotTokenCount;
ogg_int32_t TokensToBeCoded;
ogg_int32_t TokensCoded;
/********************************************************************/
/* SuperBlock, MacroBLock and Fragment Information */
/* Coded flag arrays and counters for them */
unsigned char *PartiallyCodedFlags;
unsigned char *PartiallyCodedMbPatterns;
unsigned char *UncodedMbFlags;
unsigned char *extra_fragments; /* extra updates not
recommended by pre-processor */
ogg_int16_t *OriginalDC;
ogg_uint32_t *FragmentLastQ; /* Array used to keep track of
quality at which each
fragment was last
updated. */
unsigned char *FragTokens;
ogg_uint32_t *FragTokenCounts; /* Number of tokens per fragment */
ogg_uint32_t *RunHuffIndices;
ogg_uint32_t *LastCodedErrorScore;
ogg_uint32_t *ModeList;
MOTION_VECTOR *MVList;
unsigned char *BlockCodedFlags;
ogg_uint32_t MvListCount;
ogg_uint32_t ModeListCount;
unsigned char *DataOutputBuffer;
/*********************************************************************/
ogg_uint32_t RunLength;
ogg_uint32_t MaxBitTarget; /* Cut off target for rate capping */
double BitRateCapFactor; /* Factor relating normal frame target
to cut off target. */
unsigned char MBCodingMode; /* Coding mode flags */
ogg_int32_t MVPixelOffsetY[MAX_SEARCH_SITES];
ogg_uint32_t InterTripOutThresh;
unsigned char MVEnabled;
ogg_uint32_t MotionVectorSearchCount;
ogg_uint32_t FrameMVSearcOunt;
ogg_int32_t MVSearchSteps;
ogg_int32_t MVOffsetX[MAX_SEARCH_SITES];
ogg_int32_t MVOffsetY[MAX_SEARCH_SITES];
ogg_int32_t HalfPixelRef2Offset[9]; /* Offsets for half pixel
compensation */
signed char HalfPixelXOffset[9]; /* Half pixel MV offsets for X */
signed char HalfPixelYOffset[9]; /* Half pixel MV offsets for Y */
ogg_uint32_t bit_pattern ;
unsigned char bits_so_far ;
ogg_uint32_t lastval ;
ogg_uint32_t lastrun ;
Q_LIST_ENTRY *quantized_list;
MOTION_VECTOR MVector;
ogg_uint32_t TempBitCount;
ogg_int16_t *DCT_codes; /* Buffer that stores the result of
Forward DCT */
ogg_int16_t *DCTDataBuffer; /* Input data buffer for Forward DCT */
/* Motion compensation related variables */
ogg_uint32_t MvMaxExtent;
double QTargetModifier[Q_TABLE_SIZE];
/* instances (used for reconstructing buffers and to hold tokens etc.) */
PP_INSTANCE pp; /* preprocessor */
PB_INSTANCE pb; /* playback */
/* ogg bitpacker for use in packet coding, other API state */
oggpack_buffer *oggbuffer;
#ifdef LIBOGG2 /* Remember, this is just until we drop libogg1 */
ogg_buffer_state *oggbufferstate;
#endif
int readyflag;
int packetflag;
int doneflag;
} CP_INSTANCE;
#define clamp255(x) ((unsigned char)((((x)<0)-1) & ((x) | -((x)>255))))
extern void ConfigurePP( PP_INSTANCE *ppi, int Level ) ;
extern ogg_uint32_t YUVAnalyseFrame( PP_INSTANCE *ppi,
ogg_uint32_t * KFIndicator );
extern void ClearPPInstance(PP_INSTANCE *ppi);
extern void InitPPInstance(PP_INSTANCE *ppi);
extern int GetFrameType(PB_INSTANCE *pbi);
extern void InitPBInstance(PB_INSTANCE *pbi);
extern void ClearPBInstance(PB_INSTANCE *pbi);
extern void IDctSlow( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData ) ;
extern void IDct10( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData );
extern void IDct1( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData );
extern void ReconIntra( PB_INSTANCE *pbi, unsigned char * ReconPtr,
ogg_int16_t * ChangePtr, ogg_uint32_t LineStep );
extern void ReconInter( PB_INSTANCE *pbi, unsigned char * ReconPtr,
unsigned char * RefPtr, ogg_int16_t * ChangePtr,
ogg_uint32_t LineStep ) ;
extern void ReconInterHalfPixel2( PB_INSTANCE *pbi, unsigned char * ReconPtr,
unsigned char * RefPtr1,
unsigned char * RefPtr2,
ogg_int16_t * ChangePtr,
ogg_uint32_t LineStep ) ;
extern void SetupLoopFilter(PB_INSTANCE *pbi);
extern void CopyBlock(unsigned char *src,
unsigned char *dest,
unsigned int srcstride);
extern void LoopFilter(PB_INSTANCE *pbi);
extern void ReconRefFrames (PB_INSTANCE *pbi);
extern void ExpandToken( Q_LIST_ENTRY * ExpandedBlock,
unsigned char * CoeffIndex, ogg_uint32_t Token,
ogg_int32_t ExtraBits );
extern void ClearDownQFragData(PB_INSTANCE *pbi);
extern void select_Y_quantiser ( PB_INSTANCE *pbi );
extern void select_Inter_quantiser ( PB_INSTANCE *pbi );
extern void select_UV_quantiser ( PB_INSTANCE *pbi );
extern void select_InterUV_quantiser ( PB_INSTANCE *pbi );
extern void quantize( PB_INSTANCE *pbi,
ogg_int16_t * DCT_block,
Q_LIST_ENTRY * quantized_list);
extern void UpdateQ( PB_INSTANCE *pbi, ogg_uint32_t NewQ );
extern void UpdateQC( CP_INSTANCE *cpi, ogg_uint32_t NewQ );
extern void fdct_short ( ogg_int16_t * InputData, ogg_int16_t * OutputData );
extern ogg_uint32_t DPCMTokenizeBlock (CP_INSTANCE *cpi,
ogg_int32_t FragIndex);
extern void TransformQuantizeBlock (CP_INSTANCE *cpi, ogg_int32_t FragIndex,
ogg_uint32_t PixelsPerLine ) ;
extern void ClearFragmentInfo(PB_INSTANCE * pbi);
extern void InitFragmentInfo(PB_INSTANCE * pbi);
extern void ClearFrameInfo(PB_INSTANCE * pbi);
extern void InitFrameInfo(PB_INSTANCE * pbi, unsigned int FrameSize);
extern void InitializeFragCoordinates(PB_INSTANCE *pbi);
extern void InitFrameDetails(PB_INSTANCE *pbi);
extern void WriteQTables(PB_INSTANCE *pbi,oggpack_buffer *opb);
extern int ReadQTables(codec_setup_info *ci,oggpack_buffer* opb);
extern void CopyQTables(PB_INSTANCE *pbi, codec_setup_info *ci);
extern void InitQTables( PB_INSTANCE *pbi );
extern void InitHuffmanSet( PB_INSTANCE *pbi );
extern void ClearHuffmanSet( PB_INSTANCE *pbi );
extern int ReadHuffmanTrees(codec_setup_info *ci, oggpack_buffer *opb);
extern void WriteHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES],
oggpack_buffer *opb);
extern void InitHuffmanTrees(PB_INSTANCE *pbi, const codec_setup_info *ci);
extern void ClearHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES]);
extern void QuadDecodeDisplayFragments ( PB_INSTANCE *pbi );
extern void PackAndWriteDFArray( CP_INSTANCE *cpi );
extern void UpdateFragQIndex(PB_INSTANCE *pbi);
extern void PostProcess(PB_INSTANCE *pbi);
extern void InitMotionCompensation ( CP_INSTANCE *cpi );
extern ogg_uint32_t GetMBIntraError (CP_INSTANCE *cpi, ogg_uint32_t FragIndex,
ogg_uint32_t PixelsPerLine ) ;
extern ogg_uint32_t GetMBInterError (CP_INSTANCE *cpi,
unsigned char * SrcPtr,
unsigned char * RefPtr,
ogg_uint32_t FragIndex,
ogg_int32_t LastXMV,
ogg_int32_t LastYMV,
ogg_uint32_t PixelsPerLine ) ;
extern void WriteFrameHeader( CP_INSTANCE *cpi) ;
extern ogg_uint32_t GetMBMVInterError (CP_INSTANCE *cpi,
unsigned char * RefFramePtr,
ogg_uint32_t FragIndex,
ogg_uint32_t PixelsPerLine,
ogg_int32_t *MVPixelOffset,
MOTION_VECTOR *MV );
extern ogg_uint32_t GetMBMVExhaustiveSearch (CP_INSTANCE *cpi,
unsigned char * RefFramePtr,
ogg_uint32_t FragIndex,
ogg_uint32_t PixelsPerLine,
MOTION_VECTOR *MV );
extern ogg_uint32_t GetFOURMVExhaustiveSearch (CP_INSTANCE *cpi,
unsigned char * RefFramePtr,
ogg_uint32_t FragIndex,
ogg_uint32_t PixelsPerLine,
MOTION_VECTOR *MV ) ;
extern ogg_uint32_t EncodeData(CP_INSTANCE *cpi);
extern ogg_uint32_t PickIntra( CP_INSTANCE *cpi,
ogg_uint32_t SBRows,
ogg_uint32_t SBCols);
extern ogg_uint32_t PickModes(CP_INSTANCE *cpi,
ogg_uint32_t SBRows,
ogg_uint32_t SBCols,
ogg_uint32_t PixelsPerLine,
ogg_uint32_t *InterError,
ogg_uint32_t *IntraError);
extern CODING_MODE FrArrayUnpackMode(PB_INSTANCE *pbi);
extern void CreateBlockMapping ( ogg_int32_t (*BlockMap)[4][4],
ogg_uint32_t YSuperBlocks,
ogg_uint32_t UVSuperBlocks,
ogg_uint32_t HFrags, ogg_uint32_t VFrags );
extern void UpRegulateDataStream (CP_INSTANCE *cpi, ogg_uint32_t RegulationQ,
ogg_int32_t RecoveryBlocks ) ;
extern void RegulateQ( CP_INSTANCE *cpi, ogg_int32_t UpdateScore );
extern void CopyBackExtraFrags(CP_INSTANCE *cpi);
extern void UpdateUMVBorder( PB_INSTANCE *pbi,
unsigned char * DestReconPtr );
extern void PInitFrameInfo(PP_INSTANCE * ppi);
extern int GetFrameType(PB_INSTANCE *pbi);
extern void SetFrameType( PB_INSTANCE *pbi,unsigned char FrType );
extern double GetEstimatedBpb( CP_INSTANCE *cpi, ogg_uint32_t TargetQ );
extern void ClearTmpBuffers(PB_INSTANCE * pbi);
extern void InitTmpBuffers(PB_INSTANCE * pbi);
extern void ScanYUVInit( PP_INSTANCE * ppi,
SCAN_CONFIG_DATA * ScanConfigPtr);
extern int LoadAndDecode(PB_INSTANCE *pbi);
#endif

View File

@ -1,120 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function: simple static lookups for VP3 frame encoder
last mod: $Id: encoder_lookup.h,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include "encoder_internal.h"
ogg_uint32_t MvPattern[(MAX_MV_EXTENT * 2) + 1] = {
0x000000ff, 0x000000fd, 0x000000fb, 0x000000f9,
0x000000f7, 0x000000f5, 0x000000f3, 0x000000f1,
0x000000ef, 0x000000ed, 0x000000eb, 0x000000e9,
0x000000e7, 0x000000e5, 0x000000e3, 0x000000e1,
0x0000006f, 0x0000006d, 0x0000006b, 0x00000069,
0x00000067, 0x00000065, 0x00000063, 0x00000061,
0x0000002f, 0x0000002d, 0x0000002b, 0x00000029,
0x00000009, 0x00000007, 0x00000002, 0x00000000,
0x00000001, 0x00000006, 0x00000008, 0x00000028,
0x0000002a, 0x0000002c, 0x0000002e, 0x00000060,
0x00000062, 0x00000064, 0x00000066, 0x00000068,
0x0000006a, 0x0000006c, 0x0000006e, 0x000000e0,
0x000000e2, 0x000000e4, 0x000000e6, 0x000000e8,
0x000000ea, 0x000000ec, 0x000000ee, 0x000000f0,
0x000000f2, 0x000000f4, 0x000000f6, 0x000000f8,
0x000000fa, 0x000000fc, 0x000000fe,
};
ogg_uint32_t MvBits[(MAX_MV_EXTENT * 2) + 1] = {
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
7, 7, 7, 7, 7, 7, 7, 7,
6, 6, 6, 6, 4, 4, 3, 3,
3, 4, 4, 6, 6, 6, 6, 7,
7, 7, 7, 7, 7, 7, 7, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8,
};
ogg_uint32_t MvPattern2[(MAX_MV_EXTENT * 2) + 1] = {
0x0000003f, 0x0000003d, 0x0000003b, 0x00000039,
0x00000037, 0x00000035, 0x00000033, 0x00000031,
0x0000002f, 0x0000002d, 0x0000002b, 0x00000029,
0x00000027, 0x00000025, 0x00000023, 0x00000021,
0x0000001f, 0x0000001d, 0x0000001b, 0x00000019,
0x00000017, 0x00000015, 0x00000013, 0x00000011,
0x0000000f, 0x0000000d, 0x0000000b, 0x00000009,
0x00000007, 0x00000005, 0x00000003, 0x00000000,
0x00000002, 0x00000004, 0x00000006, 0x00000008,
0x0000000a, 0x0000000c, 0x0000000e, 0x00000010,
0x00000012, 0x00000014, 0x00000016, 0x00000018,
0x0000001a, 0x0000001c, 0x0000001e, 0x00000020,
0x00000022, 0x00000024, 0x00000026, 0x00000028,
0x0000002a, 0x0000002c, 0x0000002e, 0x00000030,
0x00000032, 0x00000034, 0x00000036, 0x00000038,
0x0000003a, 0x0000003c, 0x0000003e,
};
ogg_uint32_t MvBits2[(MAX_MV_EXTENT * 2) + 1] = {
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6,
};
ogg_uint32_t ModeBitPatterns[MAX_MODES] = {
0x00, 0x02, 0x06, 0x0E, 0x1E, 0x3E, 0x7E, 0x7F };
ogg_int32_t ModeBitLengths[MAX_MODES] = {
1, 2, 3, 4, 5, 6, 7, 7 };
unsigned char ModeSchemes[MODE_METHODS-2][MAX_MODES] = {
/* Last Mv dominates */
{ 3, 4, 2, 0, 1, 5, 6, 7 }, /* L P M N I G GM 4 */
{ 2, 4, 3, 0, 1, 5, 6, 7 }, /* L P N M I G GM 4 */
{ 3, 4, 1, 0, 2, 5, 6, 7 }, /* L M P N I G GM 4 */
{ 2, 4, 1, 0, 3, 5, 6, 7 }, /* L M N P I G GM 4 */
/* No MV dominates */
{ 0, 4, 3, 1, 2, 5, 6, 7 }, /* N L P M I G GM 4 */
{ 0, 5, 4, 2, 3, 1, 6, 7 }, /* N G L P M I GM 4 */
};
ogg_uint32_t MvThreshTable[Q_TABLE_SIZE] = {
65, 65, 65, 65, 50, 50, 50, 50,
40, 40, 40, 40, 40, 40, 40, 40,
30, 30, 30, 30, 30, 30, 30, 30,
20, 20, 20, 20, 20, 20, 20, 20,
15, 15, 15, 15, 15, 15, 15, 15,
10, 10, 10, 10, 10, 10, 10, 10,
5, 5, 5, 5, 5, 5, 5, 5,
0, 0, 0, 0, 0, 0, 0, 0
};
ogg_uint32_t MVChangeFactorTable[Q_TABLE_SIZE] = {
11, 11, 11, 11, 12, 12, 12, 12,
13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14,
15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15
};

View File

@ -0,0 +1,274 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: enquant.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include <stdlib.h>
#include <string.h>
#include "encint.h"
void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){
const th_quant_ranges *qranges;
const th_quant_base *base_mats[2*3*64];
int indices[2][3][64];
int nbase_mats;
int nbits;
int ci;
int qi;
int qri;
int qti;
int pli;
int qtj;
int plj;
int bmi;
int i;
i=_qinfo->loop_filter_limits[0];
for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]);
nbits=OC_ILOG_32(i);
oggpackB_write(_opb,nbits,3);
for(qi=0;qi<64;qi++){
oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits);
}
/*580 bits for VP3.*/
i=1;
for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->ac_scale[qi],i);
nbits=OC_ILOGNZ_32(i);
oggpackB_write(_opb,nbits-1,4);
for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits);
/*516 bits for VP3.*/
i=1;
for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->dc_scale[qi],i);
nbits=OC_ILOGNZ_32(i);
oggpackB_write(_opb,nbits-1,4);
for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits);
/*Consolidate any duplicate base matrices.*/
nbase_mats=0;
for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
qranges=_qinfo->qi_ranges[qti]+pli;
for(qri=0;qri<=qranges->nranges;qri++){
for(bmi=0;;bmi++){
if(bmi>=nbase_mats){
base_mats[bmi]=qranges->base_matrices+qri;
indices[qti][pli][qri]=nbase_mats++;
break;
}
else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri],
sizeof(base_mats[bmi][0]))==0){
indices[qti][pli][qri]=bmi;
break;
}
}
}
}
/*Write out the list of unique base matrices.
1545 bits for VP3 matrices.*/
oggpackB_write(_opb,nbase_mats-1,9);
for(bmi=0;bmi<nbase_mats;bmi++){
for(ci=0;ci<64;ci++)oggpackB_write(_opb,base_mats[bmi][0][ci],8);
}
/*Now store quant ranges and their associated indices into the base matrix
list.
46 bits for VP3 matrices.*/
nbits=OC_ILOG_32(nbase_mats-1);
for(i=0;i<6;i++){
qti=i/3;
pli=i%3;
qranges=_qinfo->qi_ranges[qti]+pli;
if(i>0){
if(qti>0){
if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&&
memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes,
qranges->nranges*sizeof(qranges->sizes[0]))==0&&
memcmp(indices[qti][pli],indices[qti-1][pli],
(qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
oggpackB_write(_opb,1,2);
continue;
}
}
qtj=(i-1)/3;
plj=(i-1)%3;
if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&&
memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes,
qranges->nranges*sizeof(qranges->sizes[0]))==0&&
memcmp(indices[qti][pli],indices[qtj][plj],
(qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
oggpackB_write(_opb,0,1+(qti>0));
continue;
}
oggpackB_write(_opb,1,1);
}
oggpackB_write(_opb,indices[qti][pli][0],nbits);
for(qi=qri=0;qi<63;qri++){
oggpackB_write(_opb,qranges->sizes[qri]-1,OC_ILOG_32(62-qi));
qi+=qranges->sizes[qri];
oggpackB_write(_opb,indices[qti][pli][qri+1],nbits);
}
}
}
static void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d){
ogg_uint32_t t;
int l;
_d<<=1;
l=OC_ILOGNZ_32(_d)-1;
t=1+((ogg_uint32_t)1<<16+l)/_d;
_this->m=(ogg_int16_t)(t-0x10000);
_this->l=l;
}
/*See comments at oc_dequant_tables_init() for how the quantization tables'
storage should be initialized.*/
void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2],
oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo){
int qi;
int pli;
int qti;
/*Initialize the dequantization tables first.*/
oc_dequant_tables_init(_dequant,NULL,_qinfo);
/*Derive the quantization tables directly from the dequantization tables.*/
for(qi=0;qi<64;qi++)for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
int zzi;
int plj;
int qtj;
int dupe;
dupe=0;
for(qtj=0;qtj<=qti;qtj++){
for(plj=0;plj<(qtj<qti?3:pli);plj++){
if(_dequant[qi][pli][qti]==_dequant[qi][plj][qtj]){
dupe=1;
break;
}
}
if(dupe)break;
}
if(dupe){
_enquant[qi][pli][qti]=_enquant[qi][plj][qtj];
continue;
}
/*In the original VP3.2 code, the rounding offset and the size of the
dead zone around 0 were controlled by a "sharpness" parameter.
We now R-D optimize the tokens for each block after quantization,
so the rounding offset should always be 1/2, and an explicit dead
zone is unnecessary.
Hence, all of that VP3.2 code is gone from here, and the remaining
floating point code has been implemented as equivalent integer
code with exact precision.*/
for(zzi=0;zzi<64;zzi++){
oc_iquant_init(_enquant[qi][pli][qti]+zzi,
_dequant[qi][pli][qti][zzi]);
}
}
}
/*This table gives the square root of the fraction of the squared magnitude of
each DCT coefficient relative to the total, scaled by 2**16, for both INTRA
and INTER modes.
These values were measured after motion-compensated prediction, before
quantization, over a large set of test video (from QCIF to 1080p) encoded at
all possible rates.
The DC coefficient takes into account the DPCM prediction (using the
quantized values from neighboring blocks, as the encoder does, but still
before quantization of the coefficient in the current block).
The results differ significantly from the expected variance (e.g., using an
AR(1) model of the signal with rho=0.95, as is frequently done to compute
the coding gain of the DCT).
We use them to estimate an "average" quantizer for a given quantizer matrix,
as this is used to parameterize a number of the rate control decisions.
These values are themselves probably quantizer-matrix dependent, since the
shape of the matrix affects the noise distribution in the reference frames,
but they should at least give us _some_ amount of adaptivity to different
matrices, as opposed to hard-coding a table of average Q values for the
current set.
The main features they capture are that a) only a few of the quantizers in
the upper-left corner contribute anything significant at all (though INTER
mode is significantly flatter) and b) the DPCM prediction of the DC
coefficient gives a very minor improvement in the INTRA case and a quite
significant one in the INTER case (over the expected variance).*/
static const ogg_uint16_t OC_RPSD[2][64]={
{
52725,17370,10399, 6867, 5115, 3798, 2942, 2076,
17370, 9900, 6948, 4994, 3836, 2869, 2229, 1619,
10399, 6948, 5516, 4202, 3376, 2573, 2015, 1461,
6867, 4994, 4202, 3377, 2800, 2164, 1718, 1243,
5115, 3836, 3376, 2800, 2391, 1884, 1530, 1091,
3798, 2869, 2573, 2164, 1884, 1495, 1212, 873,
2942, 2229, 2015, 1718, 1530, 1212, 1001, 704,
2076, 1619, 1461, 1243, 1091, 873, 704, 474
},
{
23411,15604,13529,11601,10683, 8958, 7840, 6142,
15604,11901,10718, 9108, 8290, 6961, 6023, 4487,
13529,10718, 9961, 8527, 7945, 6689, 5742, 4333,
11601, 9108, 8527, 7414, 7084, 5923, 5175, 3743,
10683, 8290, 7945, 7084, 6771, 5754, 4793, 3504,
8958, 6961, 6689, 5923, 5754, 4679, 3936, 2989,
7840, 6023, 5742, 5175, 4793, 3936, 3522, 2558,
6142, 4487, 4333, 3743, 3504, 2989, 2558, 1829
}
};
/*The fraction of the squared magnitude of the residuals in each color channel
relative to the total, scaled by 2**16, for each pixel format.
These values were measured after motion-compensated prediction, before
quantization, over a large set of test video encoded at all possible rates.
TODO: These values are only from INTER frames; it should be re-measured for
INTRA frames.*/
static const ogg_uint16_t OC_PCD[4][3]={
{59926, 3038, 2572},
{55201, 5597, 4738},
{55201, 5597, 4738},
{47682, 9669, 8185}
};
/*Compute an "average" quantizer for each qi level.
We do one for INTER and one for INTRA, since their behavior is very
different, but average across chroma channels.
The basic approach is to compute a harmonic average of the squared quantizer,
weighted by the expected squared magnitude of the DCT coefficients.
Under the (not quite true) assumption that DCT coefficients are
Laplacian-distributed, this preserves the product Q*lambda, where
lambda=sqrt(2/sigma**2) is the Laplacian distribution parameter (not to be
confused with the lambda used in R-D optimization throughout most of the
rest of the code).
The value Q*lambda completely determines the entropy of the coefficients.*/
void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64],
ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt){
int qi;
int pli;
int qti;
int ci;
for(qti=0;qti<2;qti++)for(qi=0;qi<64;qi++){
ogg_int64_t q2;
q2=0;
for(pli=0;pli<3;pli++){
ogg_uint32_t qp;
qp=0;
for(ci=0;ci<64;ci++){
unsigned rq;
unsigned qd;
qd=_dequant[qi][pli][qti][OC_IZIG_ZAG[ci]];
rq=(OC_RPSD[qti][ci]+(qd>>1))/qd;
qp+=rq*(ogg_uint32_t)rq;
}
q2+=OC_PCD[_pixel_fmt][pli]*(ogg_int64_t)qp;
}
/*qavg=1.0/sqrt(q2).*/
_log_qavg[qti][qi]=OC_Q57(48)-oc_blog64(q2)>>1;
}
}

View File

@ -0,0 +1,27 @@
#if !defined(_enquant_H)
# define _enquant_H (1)
# include "quant.h"
typedef struct oc_iquant oc_iquant;
#define OC_QUANT_MAX_LOG (OC_Q57(OC_STATIC_ILOG_32(OC_QUANT_MAX)-1))
/*Used to compute x/d via ((x*m>>16)+x>>l)+(x<0))
(i.e., one 16x16->16 mul, 2 shifts, and 2 adds).
This is not an approximation; for 16-bit x and d, it is exact.*/
struct oc_iquant{
ogg_int16_t m;
ogg_int16_t l;
};
typedef oc_iquant oc_iquant_table[64];
void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo);
void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2],
oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo);
void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64],
ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt);
#endif

View File

@ -0,0 +1,422 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: fdct.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include "encint.h"
#include "dct.h"
/*Performs a forward 8 point Type-II DCT transform.
The output is scaled by a factor of 2 from the orthonormal version of the
transform.
_y: The buffer to store the result in.
Data will be placed the first 8 entries (e.g., in a row of an 8x8 block).
_x: The input coefficients.
Every 8th entry is used (e.g., from a column of an 8x8 block).*/
static void oc_fdct8(ogg_int16_t _y[8],const ogg_int16_t *_x){
int t0;
int t1;
int t2;
int t3;
int t4;
int t5;
int t6;
int t7;
int r;
int s;
int u;
int v;
/*Stage 1:*/
/*0-7 butterfly.*/
t0=_x[0<<3]+(int)_x[7<<3];
t7=_x[0<<3]-(int)_x[7<<3];
/*1-6 butterfly.*/
t1=_x[1<<3]+(int)_x[6<<3];
t6=_x[1<<3]-(int)_x[6<<3];
/*2-5 butterfly.*/
t2=_x[2<<3]+(int)_x[5<<3];
t5=_x[2<<3]-(int)_x[5<<3];
/*3-4 butterfly.*/
t3=_x[3<<3]+(int)_x[4<<3];
t4=_x[3<<3]-(int)_x[4<<3];
/*Stage 2:*/
/*0-3 butterfly.*/
r=t0+t3;
t3=t0-t3;
t0=r;
/*1-2 butterfly.*/
r=t1+t2;
t2=t1-t2;
t1=r;
/*6-5 butterfly.*/
r=t6+t5;
t5=t6-t5;
t6=r;
/*Stages 3 and 4 are where all the approximation occurs.
These are chosen to be as close to an exact inverse of the approximations
made in the iDCT as possible, while still using mostly 16-bit arithmetic.
We use some 16x16->32 signed MACs, but those still commonly execute in 1
cycle on a 16-bit DSP.
For example, s=(27146*t5+0x4000>>16)+t5+(t5!=0) is an exact inverse of
t5=(OC_C4S4*s>>16).
That is, applying the latter to the output of the former will recover t5
exactly (over the valid input range of t5, -23171...23169).
We increase the rounding bias to 0xB500 in this particular case so that
errors inverting the subsequent butterfly are not one-sided (e.g., the
mean error is very close to zero).
The (t5!=0) term could be replaced simply by 1, but we want to send 0 to 0.
The fDCT of an all-zeros block will still not be zero, because of the
biases we added at the very beginning of the process, but it will be close
enough that it is guaranteed to round to zero.*/
/*Stage 3:*/
/*4-5 butterfly.*/
s=(27146*t5+0xB500>>16)+t5+(t5!=0)>>1;
r=t4+s;
t5=t4-s;
t4=r;
/*7-6 butterfly.*/
s=(27146*t6+0xB500>>16)+t6+(t6!=0)>>1;
r=t7+s;
t6=t7-s;
t7=r;
/*Stage 4:*/
/*0-1 butterfly.*/
r=(27146*t0+0x4000>>16)+t0+(t0!=0);
s=(27146*t1+0xB500>>16)+t1+(t1!=0);
u=r+s>>1;
v=r-u;
_y[0]=u;
_y[4]=v;
/*3-2 rotation by 6pi/16*/
u=(OC_C6S2*t2+OC_C2S6*t3+0x6CB7>>16)+(t3!=0);
s=(OC_C6S2*u>>16)-t2;
v=(s*21600+0x2800>>18)+s+(s!=0);
_y[2]=u;
_y[6]=v;
/*6-5 rotation by 3pi/16*/
u=(OC_C5S3*t6+OC_C3S5*t5+0x0E3D>>16)+(t5!=0);
s=t6-(OC_C5S3*u>>16);
v=(s*26568+0x3400>>17)+s+(s!=0);
_y[5]=u;
_y[3]=v;
/*7-4 rotation by 7pi/16*/
u=(OC_C7S1*t4+OC_C1S7*t7+0x7B1B>>16)+(t7!=0);
s=(OC_C7S1*u>>16)-t4;
v=(s*20539+0x3000>>20)+s+(s!=0);
_y[1]=u;
_y[7]=v;
}
void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64],
const ogg_int16_t _x[64]){
(*_enc->opt_vtable.fdct8x8)(_y,_x);
}
/*Performs a forward 8x8 Type-II DCT transform.
The output is scaled by a factor of 4 relative to the orthonormal version
of the transform.
_y: The buffer to store the result in.
This may be the same as _x.
_x: The input coefficients. */
void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
const ogg_int16_t *in;
ogg_int16_t *end;
ogg_int16_t *out;
ogg_int16_t w[64];
int i;
/*Add two extra bits of working precision to improve accuracy; any more and
we could overflow.*/
for(i=0;i<64;i++)w[i]=_x[i]<<2;
/*These biases correct for some systematic error that remains in the full
fDCT->iDCT round trip.*/
w[0]+=(w[0]!=0)+1;
w[1]++;
w[8]--;
/*Transform columns of w into rows of _y.*/
for(in=w,out=_y,end=out+64;out<end;in++,out+=8)oc_fdct8(out,in);
/*Transform columns of _y into rows of w.*/
for(in=_y,out=w,end=out+64;out<end;in++,out+=8)oc_fdct8(out,in);
/*Round the result back to the external working precision (which is still
scaled by four relative to the orthogonal result).
TODO: We should just update the external working precision.*/
for(i=0;i<64;i++)_y[i]=w[i]+2>>2;
}
/*This does not seem to outperform simple LFE border padding before MC.
It yields higher PSNR, but much higher bitrate usage.*/
#if 0
typedef struct oc_extension_info oc_extension_info;
/*Information needed to pad boundary blocks.
We multiply each row/column by an extension matrix that fills in the padding
values as a linear combination of the active values, so that an equivalent
number of coefficients are forced to zero.
This costs at most 16 multiplies, the same as a 1-D fDCT itself, and as
little as 7 multiplies.
We compute the extension matrices for every possible shape in advance, as
there are only 35.
The coefficients for all matrices are stored in a single array to take
advantage of the overlap and repetitiveness of many of the shapes.
A similar technique is applied to the offsets into this array.
This reduces the required table storage by about 48%.
See tools/extgen.c for details.
We could conceivably do the same for all 256 possible shapes.*/
struct oc_extension_info{
/*The mask of the active pixels in the shape.*/
short mask;
/*The number of active pixels in the shape.*/
short na;
/*The extension matrix.
This is (8-na)xna*/
const ogg_int16_t *const *ext;
/*The pixel indices: na active pixels followed by 8-na padding pixels.*/
unsigned char pi[8];
/*The coefficient indices: na unconstrained coefficients followed by 8-na
coefficients to be forced to zero.*/
unsigned char ci[8];
};
/*The number of shapes we need.*/
#define OC_NSHAPES (35)
static const ogg_int16_t OC_EXT_COEFFS[229]={
0x7FFF,0xE1F8,0x6903,0xAA79,0x5587,0x7FFF,0x1E08,0x7FFF,
0x5587,0xAA79,0x6903,0xE1F8,0x7FFF,0x0000,0x0000,0x0000,
0x7FFF,0x0000,0x0000,0x7FFF,0x8000,0x7FFF,0x0000,0x0000,
0x7FFF,0xE1F8,0x1E08,0xB0A7,0xAA1D,0x337C,0x7FFF,0x4345,
0x2267,0x4345,0x7FFF,0x337C,0xAA1D,0xB0A7,0x8A8C,0x4F59,
0x03B4,0xE2D6,0x7FFF,0x2CF3,0x7FFF,0xE2D6,0x03B4,0x4F59,
0x8A8C,0x1103,0x7AEF,0x5225,0xDF60,0xC288,0xDF60,0x5225,
0x7AEF,0x1103,0x668A,0xD6EE,0x3A16,0x0E6C,0xFA07,0x0E6C,
0x3A16,0xD6EE,0x668A,0x2A79,0x2402,0x980F,0x50F5,0x4882,
0x50F5,0x980F,0x2402,0x2A79,0xF976,0x2768,0x5F22,0x2768,
0xF976,0x1F91,0x76C1,0xE9AE,0x76C1,0x1F91,0x7FFF,0xD185,
0x0FC8,0xD185,0x7FFF,0x4F59,0x4345,0xED62,0x4345,0x4F59,
0xF574,0x5D99,0x2CF3,0x5D99,0xF574,0x5587,0x3505,0x30FC,
0xF482,0x953C,0xEAC4,0x7FFF,0x4F04,0x7FFF,0xEAC4,0x953C,
0xF482,0x30FC,0x4F04,0x273D,0xD8C3,0x273D,0x1E09,0x61F7,
0x1E09,0x273D,0xD8C3,0x273D,0x4F04,0x30FC,0xA57E,0x153C,
0x6AC4,0x3C7A,0x1E08,0x3C7A,0x6AC4,0x153C,0xA57E,0x7FFF,
0xA57E,0x5A82,0x6AC4,0x153C,0xC386,0xE1F8,0xC386,0x153C,
0x6AC4,0x5A82,0xD8C3,0x273D,0x7FFF,0xE1F7,0x7FFF,0x273D,
0xD8C3,0x4F04,0x30FC,0xD8C3,0x273D,0xD8C3,0x30FC,0x4F04,
0x1FC8,0x67AD,0x1853,0xE038,0x1853,0x67AD,0x1FC8,0x4546,
0xE038,0x1FC8,0x3ABA,0x1FC8,0xE038,0x4546,0x3505,0x5587,
0xF574,0xBC11,0x78F4,0x4AFB,0xE6F3,0x4E12,0x3C11,0xF8F4,
0x4AFB,0x3C7A,0xF88B,0x3C11,0x78F4,0xCAFB,0x7FFF,0x08CC,
0x070C,0x236D,0x5587,0x236D,0x070C,0xF88B,0x3C7A,0x4AFB,
0xF8F4,0x3C11,0x7FFF,0x153C,0xCAFB,0x153C,0x7FFF,0x1E08,
0xE1F8,0x7FFF,0x08CC,0x7FFF,0xCAFB,0x78F4,0x3C11,0x4E12,
0xE6F3,0x4AFB,0x78F4,0xBC11,0xFE3D,0x7FFF,0xFE3D,0x2F3A,
0x7FFF,0x2F3A,0x89BC,0x7FFF,0x89BC
};
static const ogg_int16_t *const OC_EXT_ROWS[96]={
OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,
OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 6,
OC_EXT_COEFFS+ 27,OC_EXT_COEFFS+ 38,OC_EXT_COEFFS+ 43,OC_EXT_COEFFS+ 32,
OC_EXT_COEFFS+ 49,OC_EXT_COEFFS+ 58,OC_EXT_COEFFS+ 67,OC_EXT_COEFFS+ 71,
OC_EXT_COEFFS+ 62,OC_EXT_COEFFS+ 53,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,
OC_EXT_COEFFS+ 14,OC_EXT_COEFFS+ 13,OC_EXT_COEFFS+ 76,OC_EXT_COEFFS+ 81,
OC_EXT_COEFFS+ 86,OC_EXT_COEFFS+ 91,OC_EXT_COEFFS+ 96,OC_EXT_COEFFS+ 98,
OC_EXT_COEFFS+ 93,OC_EXT_COEFFS+ 88,OC_EXT_COEFFS+ 83,OC_EXT_COEFFS+ 78,
OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12,
OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,
OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 103,OC_EXT_COEFFS+ 108,
OC_EXT_COEFFS+ 126,OC_EXT_COEFFS+ 16,OC_EXT_COEFFS+ 137,OC_EXT_COEFFS+ 141,
OC_EXT_COEFFS+ 20,OC_EXT_COEFFS+ 130,OC_EXT_COEFFS+ 113,OC_EXT_COEFFS+ 116,
OC_EXT_COEFFS+ 146,OC_EXT_COEFFS+ 153,OC_EXT_COEFFS+ 160,OC_EXT_COEFFS+ 167,
OC_EXT_COEFFS+ 170,OC_EXT_COEFFS+ 163,OC_EXT_COEFFS+ 156,OC_EXT_COEFFS+ 149,
OC_EXT_COEFFS+ 119,OC_EXT_COEFFS+ 122,OC_EXT_COEFFS+ 174,OC_EXT_COEFFS+ 177,
OC_EXT_COEFFS+ 182,OC_EXT_COEFFS+ 187,OC_EXT_COEFFS+ 192,OC_EXT_COEFFS+ 197,
OC_EXT_COEFFS+ 202,OC_EXT_COEFFS+ 207,OC_EXT_COEFFS+ 210,OC_EXT_COEFFS+ 215,
OC_EXT_COEFFS+ 179,OC_EXT_COEFFS+ 189,OC_EXT_COEFFS+ 24,OC_EXT_COEFFS+ 204,
OC_EXT_COEFFS+ 184,OC_EXT_COEFFS+ 194,OC_EXT_COEFFS+ 212,OC_EXT_COEFFS+ 199,
OC_EXT_COEFFS+ 217,OC_EXT_COEFFS+ 100,OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,
OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 134,
OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 220,OC_EXT_COEFFS+ 223,
OC_EXT_COEFFS+ 226,OC_EXT_COEFFS+ 227,OC_EXT_COEFFS+ 224,OC_EXT_COEFFS+ 221
};
static const oc_extension_info OC_EXTENSION_INFO[OC_NSHAPES]={
{0x7F,7,OC_EXT_ROWS+ 0,{0,1,2,3,4,5,6,7},{0,1,2,4,5,6,7,3}},
{0xFE,7,OC_EXT_ROWS+ 7,{1,2,3,4,5,6,7,0},{0,1,2,4,5,6,7,3}},
{0x3F,6,OC_EXT_ROWS+ 8,{0,1,2,3,4,5,7,6},{0,1,3,4,6,7,5,2}},
{0xFC,6,OC_EXT_ROWS+ 10,{2,3,4,5,6,7,1,0},{0,1,3,4,6,7,5,2}},
{0x1F,5,OC_EXT_ROWS+ 12,{0,1,2,3,4,7,6,5},{0,2,3,5,7,6,4,1}},
{0xF8,5,OC_EXT_ROWS+ 15,{3,4,5,6,7,2,1,0},{0,2,3,5,7,6,4,1}},
{0x0F,4,OC_EXT_ROWS+ 18,{0,1,2,3,7,6,5,4},{0,2,4,6,7,5,3,1}},
{0xF0,4,OC_EXT_ROWS+ 18,{4,5,6,7,3,2,1,0},{0,2,4,6,7,5,3,1}},
{0x07,3,OC_EXT_ROWS+ 22,{0,1,2,7,6,5,4,3},{0,3,6,7,5,4,2,1}},
{0xE0,3,OC_EXT_ROWS+ 27,{5,6,7,4,3,2,1,0},{0,3,6,7,5,4,2,1}},
{0x03,2,OC_EXT_ROWS+ 32,{0,1,7,6,5,4,3,2},{0,4,7,6,5,3,2,1}},
{0xC0,2,OC_EXT_ROWS+ 32,{6,7,5,4,3,2,1,0},{0,4,7,6,5,3,2,1}},
{0x01,1,OC_EXT_ROWS+ 0,{0,7,6,5,4,3,2,1},{0,7,6,5,4,3,2,1}},
{0x80,1,OC_EXT_ROWS+ 0,{7,6,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
{0x7E,6,OC_EXT_ROWS+ 42,{1,2,3,4,5,6,7,0},{0,1,2,5,6,7,4,3}},
{0x7C,5,OC_EXT_ROWS+ 44,{2,3,4,5,6,7,1,0},{0,1,4,5,7,6,3,2}},
{0x3E,5,OC_EXT_ROWS+ 47,{1,2,3,4,5,7,6,0},{0,1,4,5,7,6,3,2}},
{0x78,4,OC_EXT_ROWS+ 50,{3,4,5,6,7,2,1,0},{0,4,5,7,6,3,2,1}},
{0x3C,4,OC_EXT_ROWS+ 54,{2,3,4,5,7,6,1,0},{0,3,4,7,6,5,2,1}},
{0x1E,4,OC_EXT_ROWS+ 58,{1,2,3,4,7,6,5,0},{0,4,5,7,6,3,2,1}},
{0x70,3,OC_EXT_ROWS+ 62,{4,5,6,7,3,2,1,0},{0,5,7,6,4,3,2,1}},
{0x38,3,OC_EXT_ROWS+ 67,{3,4,5,7,6,2,1,0},{0,5,6,7,4,3,2,1}},
{0x1C,3,OC_EXT_ROWS+ 72,{2,3,4,7,6,5,1,0},{0,5,6,7,4,3,2,1}},
{0x0E,3,OC_EXT_ROWS+ 77,{1,2,3,7,6,5,4,0},{0,5,7,6,4,3,2,1}},
{0x60,2,OC_EXT_ROWS+ 82,{5,6,7,4,3,2,1,0},{0,2,7,6,5,4,3,1}},
{0x30,2,OC_EXT_ROWS+ 36,{4,5,7,6,3,2,1,0},{0,4,7,6,5,3,2,1}},
{0x18,2,OC_EXT_ROWS+ 90,{3,4,7,6,5,2,1,0},{0,1,7,6,5,4,3,2}},
{0x0C,2,OC_EXT_ROWS+ 34,{2,3,7,6,5,4,1,0},{0,4,7,6,5,3,2,1}},
{0x06,2,OC_EXT_ROWS+ 84,{1,2,7,6,5,4,3,0},{0,2,7,6,5,4,3,1}},
{0x40,1,OC_EXT_ROWS+ 0,{6,7,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
{0x20,1,OC_EXT_ROWS+ 0,{5,7,6,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
{0x10,1,OC_EXT_ROWS+ 0,{4,7,6,5,3,2,1,0},{0,7,6,5,4,3,2,1}},
{0x08,1,OC_EXT_ROWS+ 0,{3,7,6,5,4,2,1,0},{0,7,6,5,4,3,2,1}},
{0x04,1,OC_EXT_ROWS+ 0,{2,7,6,5,4,3,1,0},{0,7,6,5,4,3,2,1}},
{0x02,1,OC_EXT_ROWS+ 0,{1,7,6,5,4,3,2,0},{0,7,6,5,4,3,2,1}}
};
/*Pads a single column of a partial block and then performs a forward Type-II
DCT on the result.
The input is scaled by a factor of 4 and biased appropriately for the current
fDCT implementation.
The output is scaled by an additional factor of 2 from the orthonormal
version of the transform.
_y: The buffer to store the result in.
Data will be placed the first 8 entries (e.g., in a row of an 8x8 block).
_x: The input coefficients.
Every 8th entry is used (e.g., from a column of an 8x8 block).
_e: The extension information for the shape.*/
static void oc_fdct8_ext(ogg_int16_t _y[8],ogg_int16_t *_x,
const oc_extension_info *_e){
const unsigned char *pi;
int na;
na=_e->na;
pi=_e->pi;
if(na==1){
int ci;
/*While the branch below is still correct for shapes with na==1, we can
perform the entire transform with just 1 multiply in this case instead
of 23.*/
_y[0]=(ogg_int16_t)(OC_DIV2_16(OC_C4S4*(_x[pi[0]])));
for(ci=1;ci<8;ci++)_y[ci]=0;
}
else{
const ogg_int16_t *const *ext;
int zpi;
int api;
int nz;
/*First multiply by the extension matrix to compute the padding values.*/
nz=8-na;
ext=_e->ext;
for(zpi=0;zpi<nz;zpi++){
ogg_int32_t v;
v=0;
for(api=0;api<na;api++){
v+=ext[zpi][api]*(ogg_int32_t)(_x[pi[api]<<3]<<1);
}
_x[pi[na+zpi]<<3]=(ogg_int16_t)(v+0x8000>>16)+1>>1;
}
oc_fdct8(_y,_x);
}
}
/*Performs a forward 8x8 Type-II DCT transform on blocks which overlap the
border of the picture region.
This method ONLY works with rectangular regions.
_border: A description of which pixels are inside the border.
_y: The buffer to store the result in.
This may be the same as _x.
_x: The input pixel values.
Pixel values outside the border will be ignored.*/
void oc_fdct8x8_border(const oc_border_info *_border,
ogg_int16_t _y[64],const ogg_int16_t _x[64]){
ogg_int16_t *in;
ogg_int16_t *out;
ogg_int16_t w[64];
ogg_int64_t mask;
const oc_extension_info *cext;
const oc_extension_info *rext;
int cmask;
int rmask;
int ri;
int ci;
/*Identify the shapes of the non-zero rows and columns.*/
rmask=cmask=0;
mask=_border->mask;
for(ri=0;ri<8;ri++){
/*This aggregation is _only_ correct for rectangular masks.*/
cmask|=((mask&0xFF)!=0)<<ri;
rmask|=mask&0xFF;
mask>>=8;
}
/*Find the associated extension info for these shapes.*/
if(cmask==0xFF)cext=NULL;
else for(cext=OC_EXTENSION_INFO;cext->mask!=cmask;){
/*If we somehow can't find the shape, then just do an unpadded fDCT.
It won't be efficient, but it should still be correct.*/
if(++cext>=OC_EXTENSION_INFO+OC_NSHAPES){
oc_enc_fdct8x8_c(_y,_x);
return;
}
}
if(rmask==0xFF)rext=NULL;
else for(rext=OC_EXTENSION_INFO;rext->mask!=rmask;){
/*If we somehow can't find the shape, then just do an unpadded fDCT.
It won't be efficient, but it should still be correct.*/
if(++rext>=OC_EXTENSION_INFO+OC_NSHAPES){
oc_enc_fdct8x8_c(_y,_x);
return;
}
}
/*Add two extra bits of working precision to improve accuracy; any more and
we could overflow.*/
for(ci=0;ci<64;ci++)w[ci]=_x[ci]<<2;
/*These biases correct for some systematic error that remains in the full
fDCT->iDCT round trip.
We can safely add them before padding, since if these pixel values are
overwritten, we didn't care what they were anyway (and the unbiased values
will usually yield smaller DCT coefficient magnitudes).*/
w[0]+=(w[0]!=0)+1;
w[1]++;
w[8]--;
/*Transform the columns.
We can ignore zero columns without a problem.*/
in=w;
out=_y;
if(cext==NULL)for(ci=0;ci<8;ci++)oc_fdct8(out+(ci<<3),in+ci);
else for(ci=0;ci<8;ci++)if(rmask&(1<<ci))oc_fdct8_ext(out+(ci<<3),in+ci,cext);
/*Transform the rows.
We transform even rows that are supposedly zero, because rounding errors
may make them slightly non-zero, and this will give a more precise
reconstruction with very small quantizers.*/
in=_y;
out=w;
if(rext==NULL)for(ri=0;ri<8;ri++)oc_fdct8(out+(ri<<3),in+ri);
else for(ri=0;ri<8;ri++)oc_fdct8_ext(out+(ri<<3),in+ri,rext);
/*Round the result back to the external working precision (which is still
scaled by four relative to the orthogonal result).
TODO: We should just update the external working precision.*/
for(ci=0;ci<64;ci++)_y[ci]=w[ci]+2>>2;
}
#endif

View File

@ -0,0 +1,87 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: fragment.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include <string.h>
#include "internal.h"
void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst,
const unsigned char *_src,int _ystride){
(*_state->opt_vtable.frag_copy)(_dst,_src,_ystride);
}
void oc_frag_copy_c(unsigned char *_dst,const unsigned char *_src,int _ystride){
int i;
for(i=8;i-->0;){
memcpy(_dst,_src,8*sizeof(*_dst));
_dst+=_ystride;
_src+=_ystride;
}
}
void oc_frag_recon_intra(const oc_theora_state *_state,unsigned char *_dst,
int _ystride,const ogg_int16_t _residue[64]){
_state->opt_vtable.frag_recon_intra(_dst,_ystride,_residue);
}
void oc_frag_recon_intra_c(unsigned char *_dst,int _ystride,
const ogg_int16_t _residue[64]){
int i;
for(i=0;i<8;i++){
int j;
for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+128);
_dst+=_ystride;
}
}
void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst,
const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
_state->opt_vtable.frag_recon_inter(_dst,_src,_ystride,_residue);
}
void oc_frag_recon_inter_c(unsigned char *_dst,
const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
int i;
for(i=0;i<8;i++){
int j;
for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+_src[j]);
_dst+=_ystride;
_src+=_ystride;
}
}
void oc_frag_recon_inter2(const oc_theora_state *_state,unsigned char *_dst,
const unsigned char *_src1,const unsigned char *_src2,int _ystride,
const ogg_int16_t _residue[64]){
_state->opt_vtable.frag_recon_inter2(_dst,_src1,_src2,_ystride,_residue);
}
void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]){
int i;
for(i=0;i<8;i++){
int j;
for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+(_src1[j]+_src2[j]>>1));
_dst+=_ystride;
_src1+=_ystride;
_src2+=_ystride;
}
}
void oc_restore_fpu(const oc_theora_state *_state){
_state->opt_vtable.restore_fpu();
}
void oc_restore_fpu_c(void){}

View File

@ -1,614 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: frarray.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include <string.h>
#include "encoder_internal.h"
#include "block_inline.h"
static ogg_uint32_t FrArrayCodeSBRun( CP_INSTANCE *cpi, ogg_uint32_t value ){
ogg_uint32_t CodedVal = 0;
ogg_uint32_t CodedBits = 0;
/* Coding scheme:
Codeword RunLength
0 1
10x 2-3
110x 4-5
1110xx 6-9
11110xxx 10-17
111110xxxx 18-33
111111xxxxxxxxxxxx 34-4129 */
if ( value == 1 ){
CodedVal = 0;
CodedBits = 1;
} else if ( value <= 3 ) {
CodedVal = 0x0004 + (value - 2);
CodedBits = 3;
} else if ( value <= 5 ) {
CodedVal = 0x000C + (value - 4);
CodedBits = 4;
} else if ( value <= 9 ) {
CodedVal = 0x0038 + (value - 6);
CodedBits = 6;
} else if ( value <= 17 ) {
CodedVal = 0x00F0 + (value - 10);
CodedBits = 8;
} else if ( value <= 33 ) {
CodedVal = 0x03E0 + (value - 18);
CodedBits = 10;
} else {
CodedVal = 0x3F000 + (value - 34);
CodedBits = 18;
}
/* Add the bits to the encode holding buffer. */
oggpackB_write( cpi->oggbuffer, CodedVal, (ogg_uint32_t)CodedBits );
return CodedBits;
}
static ogg_uint32_t FrArrayCodeBlockRun( CP_INSTANCE *cpi,
ogg_uint32_t value ) {
ogg_uint32_t CodedVal = 0;
ogg_uint32_t CodedBits = 0;
/* Coding scheme:
Codeword RunLength
0x 1-2
10x 3-4
110x 5-6
1110xx 7-10
11110xx 11-14
11111xxxx 15-30 */
if ( value <= 2 ) {
CodedVal = value - 1;
CodedBits = 2;
} else if ( value <= 4 ) {
CodedVal = 0x0004 + (value - 3);
CodedBits = 3;
} else if ( value <= 6 ) {
CodedVal = 0x000C + (value - 5);
CodedBits = 4;
} else if ( value <= 10 ) {
CodedVal = 0x0038 + (value - 7);
CodedBits = 6;
} else if ( value <= 14 ) {
CodedVal = 0x0078 + (value - 11);
CodedBits = 7;
} else {
CodedVal = 0x01F0 + (value - 15);
CodedBits = 9;
}
/* Add the bits to the encode holding buffer. */
oggpackB_write( cpi->oggbuffer, CodedVal, (ogg_uint32_t)CodedBits );
return CodedBits;
}
void PackAndWriteDFArray( CP_INSTANCE *cpi ){
ogg_uint32_t i;
unsigned char val;
ogg_uint32_t run_count;
ogg_uint32_t SB, MB, B; /* Block, MB and SB loop variables */
ogg_uint32_t BListIndex = 0;
ogg_uint32_t LastSbBIndex = 0;
ogg_int32_t DfBlockIndex; /* Block index in display_fragments */
/* Initialise workspaces */
memset( cpi->pb.SBFullyFlags, 1, cpi->pb.SuperBlocks);
memset( cpi->pb.SBCodedFlags, 0, cpi->pb.SuperBlocks );
memset( cpi->PartiallyCodedFlags, 0, cpi->pb.SuperBlocks );
memset( cpi->BlockCodedFlags, 0, cpi->pb.UnitFragments);
for( SB = 0; SB < cpi->pb.SuperBlocks; SB++ ) {
/* Check for coded blocks and macro-blocks */
for ( MB=0; MB<4; MB++ ) {
/* If MB in frame */
if ( QuadMapToMBTopLeft(cpi->pb.BlockMap,SB,MB) >= 0 ) {
for ( B=0; B<4; B++ ) {
DfBlockIndex = QuadMapToIndex1( cpi->pb.BlockMap,SB, MB, B );
/* Does Block lie in frame: */
if ( DfBlockIndex >= 0 ) {
/* In Frame: If it is not coded then this SB is only
partly coded.: */
if ( cpi->pb.display_fragments[DfBlockIndex] ) {
cpi->pb.SBCodedFlags[SB] = 1; /* SB at least partly coded */
cpi->BlockCodedFlags[BListIndex] = 1; /* Block is coded */
}else{
cpi->pb.SBFullyFlags[SB] = 0; /* SB not fully coded */
cpi->BlockCodedFlags[BListIndex] = 0; /* Block is not coded */
}
BListIndex++;
}
}
}
}
/* Is the SB fully coded or uncoded.
If so then backup BListIndex and MBListIndex */
if ( cpi->pb.SBFullyFlags[SB] || !cpi->pb.SBCodedFlags[SB] ) {
BListIndex = LastSbBIndex; /* Reset to values from previous SB */
}else{
cpi->PartiallyCodedFlags[SB] = 1; /* Set up list of partially
coded SBs */
LastSbBIndex = BListIndex;
}
}
/* Code list of partially coded Super-Block. */
val = cpi->PartiallyCodedFlags[0];
oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
i = 0;
while ( i < cpi->pb.SuperBlocks ) {
run_count = 0;
while ( (i<cpi->pb.SuperBlocks) && (cpi->PartiallyCodedFlags[i]==val) ) {
i++;
run_count++;
}
/* Code the run */
FrArrayCodeSBRun( cpi, run_count );
val = ( val == 0 ) ? 1 : 0;
}
/* RLC Super-Block fully/not coded. */
i = 0;
/* Skip partially coded blocks */
while( (i < cpi->pb.SuperBlocks) && cpi->PartiallyCodedFlags[i] )
i++;
if ( i < cpi->pb.SuperBlocks ) {
val = cpi->pb.SBFullyFlags[i];
oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
while ( i < cpi->pb.SuperBlocks ) {
run_count = 0;
while ( (i < cpi->pb.SuperBlocks) && (cpi->pb.SBFullyFlags[i] == val) ) {
i++;
/* Skip partially coded blocks */
while( (i < cpi->pb.SuperBlocks) && cpi->PartiallyCodedFlags[i] )
i++;
run_count++;
}
/* Code the run */
FrArrayCodeSBRun( cpi, run_count );
val = ( val == 0 ) ? 1 : 0;
}
}
/* Now code the block flags */
if ( BListIndex > 0 ) {
/* Code the block flags start value */
val = cpi->BlockCodedFlags[0];
oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
/* Now code the block flags. */
for ( i = 0; i < BListIndex; ) {
run_count = 0;
while ( (cpi->BlockCodedFlags[i] == val) && (i < BListIndex) ) {
i++;
run_count++;
}
FrArrayCodeBlockRun( cpi, run_count );
val = ( val == 0 ) ? 1 : 0;
}
}
}
static void FrArrayDeCodeInit(PB_INSTANCE *pbi){
/* Initialise the decoding of a run. */
pbi->bit_pattern = 0;
pbi->bits_so_far = 0;
}
static int FrArrayDeCodeBlockRun( PB_INSTANCE *pbi, ogg_uint32_t bit_value,
ogg_int32_t * run_value ){
int ret_val = 0;
/* Add in the new bit value. */
pbi->bits_so_far++;
pbi->bit_pattern = (pbi->bit_pattern << 1) + (bit_value & 1);
/* Coding scheme:
Codeword RunLength
0x 1-2
10x 3-4
110x 5-6
1110xx 7-10
11110xx 11-14
11111xxxx 15-30
*/
switch ( pbi->bits_so_far ){
case 2:
/* If bit 1 is clear */
if ( !(pbi->bit_pattern & 0x0002) ){
ret_val = 1;
*run_value = (pbi->bit_pattern & 0x0001) + 1;
}
break;
case 3:
/* If bit 1 is clear */
if ( !(pbi->bit_pattern & 0x0002) ){
ret_val = 1;
*run_value = (pbi->bit_pattern & 0x0001) + 3;
}
break;
case 4:
/* If bit 1 is clear */
if ( !(pbi->bit_pattern & 0x0002) ){
ret_val = 1;
*run_value = (pbi->bit_pattern & 0x0001) + 5;
}
break;
case 6:
/* If bit 2 is clear */
if ( !(pbi->bit_pattern & 0x0004) ){
ret_val = 1;
*run_value = (pbi->bit_pattern & 0x0003) + 7;
}
break;
case 7:
/* If bit 2 is clear */
if ( !(pbi->bit_pattern & 0x0004) ){
ret_val = 1;
*run_value = (pbi->bit_pattern & 0x0003) + 11;
}
break;
case 9:
ret_val = 1;
*run_value = (pbi->bit_pattern & 0x000F) + 15;
break;
}
return ret_val;
}
static int FrArrayDeCodeSBRun (PB_INSTANCE *pbi, ogg_uint32_t bit_value,
ogg_int32_t * run_value ){
int ret_val = 0;
/* Add in the new bit value. */
pbi->bits_so_far++;
pbi->bit_pattern = (pbi->bit_pattern << 1) + (bit_value & 1);
/* Coding scheme:
Codeword RunLength
0 1
10x 2-3
110x 4-5
1110xx 6-9
11110xxx 10-17
111110xxxx 18-33
111111xxxxxxxxxxxx 34-4129
*/
switch ( pbi->bits_so_far ){
case 1:
if ( pbi->bit_pattern == 0 ){
ret_val = 1;
*run_value = 1;
}
break;
case 3:
/* Bit 1 clear */
if ( !(pbi->bit_pattern & 0x0002) ){
ret_val = 1;
*run_value = (pbi->bit_pattern & 0x0001) + 2;
}
break;
case 4:
/* Bit 1 clear */
if ( !(pbi->bit_pattern & 0x0002) ){
ret_val = 1;
*run_value = (pbi->bit_pattern & 0x0001) + 4;
}
break;
case 6:
/* Bit 2 clear */
if ( !(pbi->bit_pattern & 0x0004) ){
ret_val = 1;
*run_value = (pbi->bit_pattern & 0x0003) + 6;
}
break;
case 8:
/* Bit 3 clear */
if ( !(pbi->bit_pattern & 0x0008) ){
ret_val = 1;
*run_value = (pbi->bit_pattern & 0x0007) + 10;
}
break;
case 10:
/* Bit 4 clear */
if ( !(pbi->bit_pattern & 0x0010) ){
ret_val = 1;
*run_value = (pbi->bit_pattern & 0x000F) + 18;
}
break;
case 18:
ret_val = 1;
*run_value = (pbi->bit_pattern & 0x0FFF) + 34;
break;
default:
ret_val = 0;
break;
}
return ret_val;
}
static void GetNextBInit(PB_INSTANCE *pbi){
long ret;
theora_read(pbi->opb,1,&ret);
pbi->NextBit = (unsigned char)ret;
/* Read run length */
FrArrayDeCodeInit(pbi);
do theora_read(pbi->opb,1,&ret);
while (FrArrayDeCodeBlockRun(pbi,ret,&pbi->BitsLeft)==0);
}
static unsigned char GetNextBBit (PB_INSTANCE *pbi){
long ret;
if ( !pbi->BitsLeft ){
/* Toggle the value. */
pbi->NextBit = ( pbi->NextBit == 1 ) ? 0 : 1;
/* Read next run */
FrArrayDeCodeInit(pbi);
do theora_read(pbi->opb,1,&ret);
while (FrArrayDeCodeBlockRun(pbi,ret,&pbi->BitsLeft)==0);
}
/* Have read a bit */
pbi->BitsLeft--;
/* Return next bit value */
return pbi->NextBit;
}
static void GetNextSbInit(PB_INSTANCE *pbi){
long ret;
theora_read(pbi->opb,1,&ret);
pbi->NextBit = (unsigned char)ret;
/* Read run length */
FrArrayDeCodeInit(pbi);
do theora_read(pbi->opb,1,&ret);
while (FrArrayDeCodeSBRun(pbi,ret,&pbi->BitsLeft)==0);
}
static unsigned char GetNextSbBit (PB_INSTANCE *pbi){
long ret;
if ( !pbi->BitsLeft ){
/* Toggle the value. */
pbi->NextBit = ( pbi->NextBit == 1 ) ? 0 : 1;
/* Read next run */
FrArrayDeCodeInit(pbi);
do theora_read(pbi->opb,1,&ret);
while (FrArrayDeCodeSBRun(pbi,ret,&pbi->BitsLeft)==0);
}
/* Have read a bit */
pbi->BitsLeft--;
/* Return next bit value */
return pbi->NextBit;
}
void QuadDecodeDisplayFragments ( PB_INSTANCE *pbi ){
ogg_uint32_t SB, MB, B;
int DataToDecode;
ogg_int32_t dfIndex;
ogg_uint32_t MBIndex = 0;
/* Reset various data structures common to key frames and inter frames. */
pbi->CodedBlockIndex = 0;
memset ( pbi->display_fragments, 0, pbi->UnitFragments );
/* For "Key frames" mark all blocks as coded and return. */
/* Else initialise the ArrayPtr array to 0 (all blocks uncoded by default) */
if ( GetFrameType(pbi) == BASE_FRAME ) {
memset( pbi->SBFullyFlags, 1, pbi->SuperBlocks );
memset( pbi->SBCodedFlags, 1, pbi->SuperBlocks );
memset( pbi->MBCodedFlags, 0, pbi->MacroBlocks );
}else{
memset( pbi->SBFullyFlags, 0, pbi->SuperBlocks );
memset( pbi->MBCodedFlags, 0, pbi->MacroBlocks );
/* Un-pack the list of partially coded Super-Blocks */
GetNextSbInit(pbi);
for( SB = 0; SB < pbi->SuperBlocks; SB++){
pbi->SBCodedFlags[SB] = GetNextSbBit (pbi);
}
/* Scan through the list of super blocks. Unless all are marked
as partially coded we have more to do. */
DataToDecode = 0;
for ( SB=0; SB<pbi->SuperBlocks; SB++ ) {
if ( !pbi->SBCodedFlags[SB] ) {
DataToDecode = 1;
break;
}
}
/* Are there further block map bits to decode ? */
if ( DataToDecode ) {
/* Un-pack the Super-Block fully coded flags. */
GetNextSbInit(pbi);
for( SB = 0; SB < pbi->SuperBlocks; SB++) {
/* Skip blocks already marked as partially coded */
while( (SB < pbi->SuperBlocks) && pbi->SBCodedFlags[SB] )
SB++;
if ( SB < pbi->SuperBlocks ) {
pbi->SBFullyFlags[SB] = GetNextSbBit (pbi);
if ( pbi->SBFullyFlags[SB] ) /* If SB is fully coded. */
pbi->SBCodedFlags[SB] = 1; /* Mark the SB as coded */
}
}
}
/* Scan through the list of coded super blocks. If at least one
is marked as partially coded then we have a block list to
decode. */
for ( SB=0; SB<pbi->SuperBlocks; SB++ ) {
if ( pbi->SBCodedFlags[SB] && !pbi->SBFullyFlags[SB] ) {
/* Initialise the block list decoder. */
GetNextBInit(pbi);
break;
}
}
}
/* Decode the block data from the bit stream. */
for ( SB=0; SB<pbi->SuperBlocks; SB++ ){
for ( MB=0; MB<4; MB++ ){
/* If MB is in the frame */
if ( QuadMapToMBTopLeft(pbi->BlockMap, SB,MB) >= 0 ){
/* Only read block level data if SB was fully or partially coded */
if ( pbi->SBCodedFlags[SB] ) {
for ( B=0; B<4; B++ ){
/* If block is valid (in frame)... */
dfIndex = QuadMapToIndex1( pbi->BlockMap, SB, MB, B );
if ( dfIndex >= 0 ){
if ( pbi->SBFullyFlags[SB] )
pbi->display_fragments[dfIndex] = 1;
else
pbi->display_fragments[dfIndex] = GetNextBBit(pbi);
/* Create linear list of coded block indices */
if ( pbi->display_fragments[dfIndex] ) {
pbi->MBCodedFlags[MBIndex] = 1;
pbi->CodedBlockList[pbi->CodedBlockIndex] = dfIndex;
pbi->CodedBlockIndex++;
}
}
}
}
MBIndex++;
}
}
}
}
CODING_MODE FrArrayUnpackMode(PB_INSTANCE *pbi){
long ret;
/* Coding scheme:
Token Codeword Bits
Entry 0 (most frequent) 0 1
Entry 1 10 2
Entry 2 110 3
Entry 3 1110 4
Entry 4 11110 5
Entry 5 111110 6
Entry 6 1111110 7
Entry 7 1111111 7
*/
/* Initialise the decoding. */
pbi->bits_so_far = 0;
theora_read(pbi->opb,1,&ret);
pbi->bit_pattern = ret;
/* Do we have a match */
if ( pbi->bit_pattern == 0 )
return (CODING_MODE)0;
/* Get the next bit */
theora_read(pbi->opb,1,&ret);
pbi->bit_pattern = (pbi->bit_pattern << 1) | ret;
/* Do we have a match */
if ( pbi->bit_pattern == 0x0002 )
return (CODING_MODE)1;
theora_read(pbi->opb,1,&ret);
pbi->bit_pattern = (pbi->bit_pattern << 1) | ret;
/* Do we have a match */
if ( pbi->bit_pattern == 0x0006 )
return (CODING_MODE)2;
theora_read(pbi->opb,1,&ret);
pbi->bit_pattern = (pbi->bit_pattern << 1) | ret;
/* Do we have a match */
if ( pbi->bit_pattern == 0x000E )
return (CODING_MODE)3;
theora_read(pbi->opb,1,&ret);
pbi->bit_pattern = (pbi->bit_pattern << 1) | ret;
/* Do we have a match */
if ( pbi->bit_pattern == 0x001E )
return (CODING_MODE)4;
theora_read(pbi->opb,1,&ret);
pbi->bit_pattern = (pbi->bit_pattern << 1) | ret;
/* Do we have a match */
if ( pbi->bit_pattern == 0x003E )
return (CODING_MODE)5;
theora_read(pbi->opb,1,&ret);
pbi->bit_pattern = (pbi->bit_pattern << 1) | ret;
/* Do we have a match */
if ( pbi->bit_pattern == 0x007E )
return (CODING_MODE)6;
else
return (CODING_MODE)7;
}

View File

@ -1,394 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: frinit.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include <stdlib.h>
#include "encoder_internal.h"
void InitializeFragCoordinates(PB_INSTANCE *pbi){
ogg_uint32_t i, j;
ogg_uint32_t HorizFrags = pbi->HFragments;
ogg_uint32_t VertFrags = pbi->VFragments;
ogg_uint32_t StartFrag = 0;
/* Y */
for(i = 0; i< VertFrags; i++){
for(j = 0; j< HorizFrags; j++){
ogg_uint32_t ThisFrag = i * HorizFrags + j;
pbi->FragCoordinates[ ThisFrag ].x=j * BLOCK_HEIGHT_WIDTH;
pbi->FragCoordinates[ ThisFrag ].y=i * BLOCK_HEIGHT_WIDTH;
}
}
/* U */
HorizFrags >>= 1;
VertFrags >>= 1;
StartFrag = pbi->YPlaneFragments;
for(i = 0; i< VertFrags; i++) {
for(j = 0; j< HorizFrags; j++) {
ogg_uint32_t ThisFrag = StartFrag + i * HorizFrags + j;
pbi->FragCoordinates[ ThisFrag ].x=j * BLOCK_HEIGHT_WIDTH;
pbi->FragCoordinates[ ThisFrag ].y=i * BLOCK_HEIGHT_WIDTH;
}
}
/* V */
StartFrag = pbi->YPlaneFragments + pbi->UVPlaneFragments;
for(i = 0; i< VertFrags; i++) {
for(j = 0; j< HorizFrags; j++) {
ogg_uint32_t ThisFrag = StartFrag + i * HorizFrags + j;
pbi->FragCoordinates[ ThisFrag ].x=j * BLOCK_HEIGHT_WIDTH;
pbi->FragCoordinates[ ThisFrag ].y=i * BLOCK_HEIGHT_WIDTH;
}
}
}
static void CalcPixelIndexTable( PB_INSTANCE *pbi){
ogg_uint32_t i;
ogg_uint32_t * PixelIndexTablePtr;
/* Calculate the pixel index table for normal image buffers */
PixelIndexTablePtr = pbi->pixel_index_table;
for ( i = 0; i < pbi->YPlaneFragments; i++ ) {
PixelIndexTablePtr[ i ] =
((i / pbi->HFragments) * VFRAGPIXELS *
pbi->info.width);
PixelIndexTablePtr[ i ] +=
((i % pbi->HFragments) * HFRAGPIXELS);
}
PixelIndexTablePtr = &pbi->pixel_index_table[pbi->YPlaneFragments];
for ( i = 0; i < ((pbi->HFragments >> 1) * pbi->VFragments); i++ ) {
PixelIndexTablePtr[ i ] =
((i / (pbi->HFragments / 2) ) *
(VFRAGPIXELS *
(pbi->info.width / 2)) );
PixelIndexTablePtr[ i ] +=
((i % (pbi->HFragments / 2) ) *
HFRAGPIXELS) + pbi->YPlaneSize;
}
/************************************************************************/
/* Now calculate the pixel index table for image reconstruction buffers */
PixelIndexTablePtr = pbi->recon_pixel_index_table;
for ( i = 0; i < pbi->YPlaneFragments; i++ ){
PixelIndexTablePtr[ i ] =
((i / pbi->HFragments) * VFRAGPIXELS *
pbi->YStride);
PixelIndexTablePtr[ i ] +=
((i % pbi->HFragments) * HFRAGPIXELS) +
pbi->ReconYDataOffset;
}
/* U blocks */
PixelIndexTablePtr = &pbi->recon_pixel_index_table[pbi->YPlaneFragments];
for ( i = 0; i < pbi->UVPlaneFragments; i++ ) {
PixelIndexTablePtr[ i ] =
((i / (pbi->HFragments / 2) ) *
(VFRAGPIXELS * (pbi->UVStride)) );
PixelIndexTablePtr[ i ] +=
((i % (pbi->HFragments / 2) ) *
HFRAGPIXELS) + pbi->ReconUDataOffset;
}
/* V blocks */
PixelIndexTablePtr =
&pbi->recon_pixel_index_table[pbi->YPlaneFragments +
pbi->UVPlaneFragments];
for ( i = 0; i < pbi->UVPlaneFragments; i++ ) {
PixelIndexTablePtr[ i ] =
((i / (pbi->HFragments / 2) ) *
(VFRAGPIXELS * (pbi->UVStride)) );
PixelIndexTablePtr[ i ] +=
((i % (pbi->HFragments / 2) ) * HFRAGPIXELS) +
pbi->ReconVDataOffset;
}
}
void ClearFragmentInfo(PB_INSTANCE * pbi){
/* free prior allocs if present */
if(pbi->display_fragments) _ogg_free(pbi->display_fragments);
if(pbi->pixel_index_table) _ogg_free(pbi->pixel_index_table);
if(pbi->recon_pixel_index_table) _ogg_free(pbi->recon_pixel_index_table);
if(pbi->FragTokenCounts) _ogg_free(pbi->FragTokenCounts);
if(pbi->CodedBlockList) _ogg_free(pbi->CodedBlockList);
if(pbi->FragMVect) _ogg_free(pbi->FragMVect);
if(pbi->FragCoeffs) _ogg_free(pbi->FragCoeffs);
if(pbi->FragCoefEOB) _ogg_free(pbi->FragCoefEOB);
if(pbi->skipped_display_fragments) _ogg_free(pbi->skipped_display_fragments);
if(pbi->QFragData) _ogg_free(pbi->QFragData);
if(pbi->TokenList) _ogg_free(pbi->TokenList);
if(pbi->FragCodingMethod) _ogg_free(pbi->FragCodingMethod);
if(pbi->FragCoordinates) _ogg_free(pbi->FragCoordinates);
if(pbi->FragQIndex) _ogg_free(pbi->FragQIndex);
if(pbi->PPCoefBuffer) _ogg_free(pbi->PPCoefBuffer);
if(pbi->FragmentVariances) _ogg_free(pbi->FragmentVariances);
if(pbi->BlockMap) _ogg_free(pbi->BlockMap);
if(pbi->SBCodedFlags) _ogg_free(pbi->SBCodedFlags);
if(pbi->SBFullyFlags) _ogg_free(pbi->SBFullyFlags);
if(pbi->MBFullyFlags) _ogg_free(pbi->MBFullyFlags);
if(pbi->MBCodedFlags) _ogg_free(pbi->MBCodedFlags);
if(pbi->_Nodes) _ogg_free(pbi->_Nodes);
pbi->_Nodes = 0;
pbi->QFragData = 0;
pbi->TokenList = 0;
pbi->skipped_display_fragments = 0;
pbi->FragCoeffs = 0;
pbi->FragCoefEOB = 0;
pbi->display_fragments = 0;
pbi->pixel_index_table = 0;
pbi->recon_pixel_index_table = 0;
pbi->FragTokenCounts = 0;
pbi->CodedBlockList = 0;
pbi->FragCodingMethod = 0;
pbi->FragMVect = 0;
pbi->MBCodedFlags = 0;
pbi->MBFullyFlags = 0;
pbi->BlockMap = 0;
pbi->SBCodedFlags = 0;
pbi->SBFullyFlags = 0;
pbi->QFragData = 0;
pbi->TokenList = 0;
pbi->skipped_display_fragments = 0;
pbi->FragCoeffs = 0;
pbi->FragCoefEOB = 0;
pbi->display_fragments = 0;
pbi->pixel_index_table = 0;
pbi->recon_pixel_index_table = 0;
pbi->FragTokenCounts = 0;
pbi->CodedBlockList = 0;
pbi->FragCodingMethod = 0;
pbi->FragCoordinates = 0;
pbi->FragMVect = 0;
pbi->PPCoefBuffer=0;
pbi->PPCoefBuffer=0;
pbi->FragQIndex = 0;
pbi->FragQIndex = 0;
pbi->FragmentVariances= 0;
pbi->FragmentVariances = 0 ;
}
void InitFragmentInfo(PB_INSTANCE * pbi){
/* clear any existing info */
ClearFragmentInfo(pbi);
/* Perform Fragment Allocations */
pbi->display_fragments =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->display_fragments));
pbi->pixel_index_table =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->pixel_index_table));
pbi->recon_pixel_index_table =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->recon_pixel_index_table));
pbi->FragTokenCounts =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragTokenCounts));
pbi->CodedBlockList =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->CodedBlockList));
pbi->FragMVect =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragMVect));
pbi->FragCoeffs =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCoeffs));
pbi->FragCoefEOB =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCoefEOB));
pbi->skipped_display_fragments =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->skipped_display_fragments));
pbi->QFragData =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->QFragData));
pbi->TokenList =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->TokenList));
pbi->FragCodingMethod =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCodingMethod));
pbi->FragCoordinates =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCoordinates));
pbi->FragQIndex =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragQIndex));
pbi->PPCoefBuffer =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->PPCoefBuffer));
pbi->FragmentVariances =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragmentVariances));
pbi->_Nodes =
_ogg_malloc(pbi->UnitFragments * sizeof(*pbi->_Nodes));
/* Super Block Initialization */
pbi->SBCodedFlags =
_ogg_malloc(pbi->SuperBlocks * sizeof(*pbi->SBCodedFlags));
pbi->SBFullyFlags =
_ogg_malloc(pbi->SuperBlocks * sizeof(*pbi->SBFullyFlags));
/* Macro Block Initialization */
pbi->MBCodedFlags =
_ogg_malloc(pbi->MacroBlocks * sizeof(*pbi->MBCodedFlags));
pbi->MBFullyFlags =
_ogg_malloc(pbi->MacroBlocks * sizeof(*pbi->MBFullyFlags));
pbi->BlockMap =
_ogg_malloc(pbi->SuperBlocks * sizeof(*pbi->BlockMap));
}
void ClearFrameInfo(PB_INSTANCE * pbi){
if(pbi->ThisFrameRecon )
_ogg_free(pbi->ThisFrameRecon );
if(pbi->GoldenFrame)
_ogg_free(pbi->GoldenFrame);
if(pbi->LastFrameRecon)
_ogg_free(pbi->LastFrameRecon);
if(pbi->PostProcessBuffer)
_ogg_free(pbi->PostProcessBuffer);
pbi->ThisFrameRecon = 0;
pbi->GoldenFrame = 0;
pbi->LastFrameRecon = 0;
pbi->PostProcessBuffer = 0;
pbi->ThisFrameRecon = 0;
pbi->GoldenFrame = 0;
pbi->LastFrameRecon = 0;
pbi->PostProcessBuffer = 0;
}
void InitFrameInfo(PB_INSTANCE * pbi, unsigned int FrameSize){
/* clear any existing info */
ClearFrameInfo(pbi);
/* allocate frames */
pbi->ThisFrameRecon =
_ogg_malloc(FrameSize*sizeof(*pbi->ThisFrameRecon));
pbi->GoldenFrame =
_ogg_malloc(FrameSize*sizeof(*pbi->GoldenFrame));
pbi->LastFrameRecon =
_ogg_malloc(FrameSize*sizeof(*pbi->LastFrameRecon));
pbi->PostProcessBuffer =
_ogg_malloc(FrameSize*sizeof(*pbi->PostProcessBuffer));
}
void InitFrameDetails(PB_INSTANCE *pbi){
int FrameSize;
/*pbi->PostProcessingLevel = 0;
pbi->PostProcessingLevel = 4;
pbi->PostProcessingLevel = 5;
pbi->PostProcessingLevel = 6;*/
pbi->PostProcessingLevel = 0;
/* Set the frame size etc. */
pbi->YPlaneSize = pbi->info.width *
pbi->info.height;
pbi->UVPlaneSize = pbi->YPlaneSize / 4;
pbi->HFragments = pbi->info.width / HFRAGPIXELS;
pbi->VFragments = pbi->info.height / VFRAGPIXELS;
pbi->UnitFragments = ((pbi->VFragments * pbi->HFragments)*3)/2;
pbi->YPlaneFragments = pbi->HFragments * pbi->VFragments;
pbi->UVPlaneFragments = pbi->YPlaneFragments / 4;
pbi->YStride = (pbi->info.width + STRIDE_EXTRA);
pbi->UVStride = pbi->YStride / 2;
pbi->ReconYPlaneSize = pbi->YStride *
(pbi->info.height + STRIDE_EXTRA);
pbi->ReconUVPlaneSize = pbi->ReconYPlaneSize / 4;
FrameSize = pbi->ReconYPlaneSize + 2 * pbi->ReconUVPlaneSize;
pbi->YDataOffset = 0;
pbi->UDataOffset = pbi->YPlaneSize;
pbi->VDataOffset = pbi->YPlaneSize + pbi->UVPlaneSize;
pbi->ReconYDataOffset =
(pbi->YStride * UMV_BORDER) + UMV_BORDER;
pbi->ReconUDataOffset = pbi->ReconYPlaneSize +
(pbi->UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2);
pbi->ReconVDataOffset = pbi->ReconYPlaneSize + pbi->ReconUVPlaneSize +
(pbi->UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2);
/* Image dimensions in Super-Blocks */
pbi->YSBRows = (pbi->info.height/32) +
( pbi->info.height%32 ? 1 : 0 );
pbi->YSBCols = (pbi->info.width/32) +
( pbi->info.width%32 ? 1 : 0 );
pbi->UVSBRows = ((pbi->info.height/2)/32) +
( (pbi->info.height/2)%32 ? 1 : 0 );
pbi->UVSBCols = ((pbi->info.width/2)/32) +
( (pbi->info.width/2)%32 ? 1 : 0 );
/* Super-Blocks per component */
pbi->YSuperBlocks = pbi->YSBRows * pbi->YSBCols;
pbi->UVSuperBlocks = pbi->UVSBRows * pbi->UVSBCols;
pbi->SuperBlocks = pbi->YSuperBlocks+2*pbi->UVSuperBlocks;
/* Useful externals */
pbi->YMacroBlocks = ((pbi->VFragments+1)/2)*((pbi->HFragments+1)/2);
pbi->UVMacroBlocks = ((pbi->VFragments/2+1)/2)*((pbi->HFragments/2+1)/2);
pbi->MacroBlocks = pbi->YMacroBlocks+2*pbi->UVMacroBlocks;
InitFragmentInfo(pbi);
InitFrameInfo(pbi, FrameSize);
InitializeFragCoordinates(pbi);
/* Configure mapping between quad-tree and fragments */
CreateBlockMapping ( pbi->BlockMap, pbi->YSuperBlocks,
pbi->UVSuperBlocks, pbi->HFragments, pbi->VFragments);
/* Re-initialise the pixel index table. */
CalcPixelIndexTable( pbi );
}

View File

@ -0,0 +1,489 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: huffdec.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include <stdlib.h>
#include <string.h>
#include <ogg/ogg.h>
#include "huffdec.h"
#include "decint.h"
/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/
#define _ogg_offsetof(_type,_field)\
((size_t)((char *)&((_type *)0)->_field-(char *)0))
/*The number of internal tokens associated with each of the spec tokens.*/
static const unsigned char OC_DCT_TOKEN_MAP_ENTRIES[TH_NDCT_TOKENS]={
1,1,1,4,8,1,1,8,1,1,1,1,1,2,2,2,2,4,8,2,2,2,4,2,2,2,2,2,8,2,4,8
};
/*The map from external spec-defined tokens to internal tokens.
This is constructed so that any extra bits read with the original token value
can be masked off the least significant bits of its internal token index.
In addition, all of the tokens which require additional extra bits are placed
at the start of the list, and grouped by type.
OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
giving it index 0 may simplify comparisons on some architectures.
These requirements require some substantial reordering.*/
static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
/*OC_DCT_EOB1_TOKEN (0 extra bits)*/
15,
/*OC_DCT_EOB2_TOKEN (0 extra bits)*/
16,
/*OC_DCT_EOB3_TOKEN (0 extra bits)*/
17,
/*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
88,
/*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
80,
/*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
1,
/*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
0,
/*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
48,
/*OC_DCT_ZRL_TOKEN (6 extra bits)*/
14,
/*OC_ONE_TOKEN (0 extra bits)*/
56,
/*OC_MINUS_ONE_TOKEN (0 extra bits)*/
57,
/*OC_TWO_TOKEN (0 extra bits)*/
58,
/*OC_MINUS_TWO_TOKEN (0 extra bits)*/
59,
/*OC_DCT_VAL_CAT2 (1 extra bit)*/
60,
62,
64,
66,
/*OC_DCT_VAL_CAT3 (2 extra bits)*/
68,
/*OC_DCT_VAL_CAT4 (3 extra bits)*/
72,
/*OC_DCT_VAL_CAT5 (4 extra bits)*/
2,
/*OC_DCT_VAL_CAT6 (5 extra bits)*/
4,
/*OC_DCT_VAL_CAT7 (6 extra bits)*/
6,
/*OC_DCT_VAL_CAT8 (10 extra bits)*/
8,
/*OC_DCT_RUN_CAT1A (1 extra bit)*/
18,
20,
22,
24,
26,
/*OC_DCT_RUN_CAT1B (3 extra bits)*/
32,
/*OC_DCT_RUN_CAT1C (4 extra bits)*/
12,
/*OC_DCT_RUN_CAT2A (2 extra bits)*/
28,
/*OC_DCT_RUN_CAT2B (3 extra bits)*/
40
};
/*These three functions are really part of the bitpack.c module, but
they are only used here.
Declaring local static versions so they can be inlined saves considerable
function call overhead.*/
static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
const unsigned char *ptr;
const unsigned char *stop;
oc_pb_window window;
int available;
window=_b->window;
available=_b->bits;
ptr=_b->ptr;
stop=_b->stop;
/*This version of _refill() doesn't bother setting eof because we won't
check for it after we've started decoding DCT tokens.*/
if(ptr>=stop)available=OC_LOTS_OF_BITS;
while(available<=OC_PB_WINDOW_SIZE-8){
available+=8;
window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
if(ptr>=stop)available=OC_LOTS_OF_BITS;
}
_b->ptr=ptr;
if(_bits>available)window|=*ptr>>(available&7);
_b->bits=available;
return window;
}
/*Read in bits without advancing the bit pointer.
Here we assume 0<=_bits&&_bits<=32.*/
static long oc_pack_look(oc_pack_buf *_b,int _bits){
oc_pb_window window;
int available;
long result;
window=_b->window;
available=_b->bits;
if(_bits==0)return 0;
if(_bits>available)_b->window=window=oc_pack_refill(_b,_bits);
result=window>>OC_PB_WINDOW_SIZE-_bits;
return result;
}
/*Advance the bit pointer.*/
static void oc_pack_adv(oc_pack_buf *_b,int _bits){
/*We ignore the special cases for _bits==0 and _bits==32 here, since they are
never used actually used.
OC_HUFF_SLUSH (defined below) would have to be at least 27 to actually read
32 bits in a single go, and would require a 32 GB lookup table (assuming
8 byte pointers, since 4 byte pointers couldn't fit such a table).*/
_b->window<<=_bits;
_b->bits-=_bits;
}
/*The log_2 of the size of a lookup table is allowed to grow to relative to
the number of unique nodes it contains.
E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is
wasted (each node will have an amortized cost of at most 20 bytes when using
4-byte pointers).
Larger numbers can decode tokens with fewer read operations, while smaller
numbers may save more space (requiring as little as 8 bytes amortized per
node, though there will be more nodes).
With a sample file:
32233473 read calls are required when no tree collapsing is done (100.0%).
19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%).
11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%).
10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%).
10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%).
Since a value of 1 gets us the vast majority of the speed-up with only a
small amount of wasted memory, this is what we use.*/
#define OC_HUFF_SLUSH (1)
/*Determines the size in bytes of a Huffman tree node that represents a
subtree of depth _nbits.
_nbits: The depth of the subtree.
If this is 0, the node is a leaf node.
Otherwise 1<<_nbits pointers are allocated for children.
Return: The number of bytes required to store the node.*/
static size_t oc_huff_node_size(int _nbits){
size_t size;
size=_ogg_offsetof(oc_huff_node,nodes);
if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits);
return size;
}
static oc_huff_node *oc_huff_node_init(char **_storage,size_t _size,int _nbits){
oc_huff_node *ret;
ret=(oc_huff_node *)*_storage;
ret->nbits=(unsigned char)_nbits;
(*_storage)+=_size;
return ret;
}
/*Determines the size in bytes of a Huffman tree.
_nbits: The depth of the subtree.
If this is 0, the node is a leaf node.
Otherwise storage for 1<<_nbits pointers are added for children.
Return: The number of bytes required to store the tree.*/
static size_t oc_huff_tree_size(const oc_huff_node *_node){
size_t size;
size=oc_huff_node_size(_node->nbits);
if(_node->nbits){
int nchildren;
int i;
nchildren=1<<_node->nbits;
for(i=0;i<nchildren;i+=1<<_node->nbits-_node->nodes[i]->depth){
size+=oc_huff_tree_size(_node->nodes[i]);
}
}
return size;
}
/*Unpacks a sub-tree from the given buffer.
_opb: The buffer to unpack from.
_binodes: The nodes to store the sub-tree in.
_nbinodes: The number of nodes available for the sub-tree.
Return: 0 on success, or a negative value on error.*/
static int oc_huff_tree_unpack(oc_pack_buf *_opb,
oc_huff_node *_binodes,int _nbinodes){
oc_huff_node *binode;
long bits;
int nused;
if(_nbinodes<1)return TH_EBADHEADER;
binode=_binodes;
nused=0;
bits=oc_pack_read1(_opb);
if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
/*Read an internal node:*/
if(!bits){
int ret;
nused++;
binode->nbits=1;
binode->depth=1;
binode->nodes[0]=_binodes+nused;
ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused);
if(ret>=0){
nused+=ret;
binode->nodes[1]=_binodes+nused;
ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused);
}
if(ret<0)return ret;
nused+=ret;
}
/*Read a leaf node:*/
else{
int ntokens;
int token;
int i;
bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
/*Find out how many internal tokens we translate this external token into.*/
ntokens=OC_DCT_TOKEN_MAP_ENTRIES[bits];
if(_nbinodes<2*ntokens-1)return TH_EBADHEADER;
/*Fill in a complete binary tree pointing to the internal tokens.*/
for(i=1;i<ntokens;i<<=1){
int j;
binode=_binodes+nused;
nused+=i;
for(j=0;j<i;j++){
binode[j].nbits=1;
binode[j].depth=1;
binode[j].nodes[0]=_binodes+nused+2*j;
binode[j].nodes[1]=_binodes+nused+2*j+1;
}
}
/*And now the leaf nodes with those tokens.*/
token=OC_DCT_TOKEN_MAP[bits];
for(i=0;i<ntokens;i++){
binode=_binodes+nused++;
binode->nbits=0;
binode->depth=1;
binode->token=token+i;
}
}
return nused;
}
/*Finds the depth of shortest branch of the given sub-tree.
The tree must be binary.
_binode: The root of the given sub-tree.
_binode->nbits must be 0 or 1.
Return: The smallest depth of a leaf node in this sub-tree.
0 indicates this sub-tree is a leaf node.*/
static int oc_huff_tree_mindepth(oc_huff_node *_binode){
int depth0;
int depth1;
if(_binode->nbits==0)return 0;
depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
return OC_MINI(depth0,depth1)+1;
}
/*Finds the number of internal nodes at a given depth, plus the number of
leaves at that depth or shallower.
The tree must be binary.
_binode: The root of the given sub-tree.
_binode->nbits must be 0 or 1.
Return: The number of entries that would be contained in a jump table of the
given depth.*/
static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){
if(_binode->nbits==0||_depth<=0)return 1;
else{
return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+
oc_huff_tree_occupancy(_binode->nodes[1],_depth-1);
}
}
/*Makes a copy of the given Huffman tree.
_node: The Huffman tree to copy.
Return: The copy of the Huffman tree.*/
static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node,
char **_storage){
oc_huff_node *ret;
ret=oc_huff_node_init(_storage,oc_huff_node_size(_node->nbits),_node->nbits);
ret->depth=_node->depth;
if(_node->nbits){
int nchildren;
int i;
int inext;
nchildren=1<<_node->nbits;
for(i=0;i<nchildren;){
ret->nodes[i]=oc_huff_tree_copy(_node->nodes[i],_storage);
inext=i+(1<<_node->nbits-ret->nodes[i]->depth);
while(++i<inext)ret->nodes[i]=ret->nodes[i-1];
}
}
else ret->token=_node->token;
return ret;
}
static size_t oc_huff_tree_collapse_size(oc_huff_node *_binode,int _depth){
size_t size;
int mindepth;
int depth;
int loccupancy;
int occupancy;
if(_binode->nbits!=0&&_depth>0){
return oc_huff_tree_collapse_size(_binode->nodes[0],_depth-1)+
oc_huff_tree_collapse_size(_binode->nodes[1],_depth-1);
}
depth=mindepth=oc_huff_tree_mindepth(_binode);
occupancy=1<<mindepth;
do{
loccupancy=occupancy;
occupancy=oc_huff_tree_occupancy(_binode,++depth);
}
while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
depth--;
size=oc_huff_node_size(depth);
if(depth>0){
size+=oc_huff_tree_collapse_size(_binode->nodes[0],depth-1);
size+=oc_huff_tree_collapse_size(_binode->nodes[1],depth-1);
}
return size;
}
static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
char **_storage);
/*Fills the given nodes table with all the children in the sub-tree at the
given depth.
The nodes in the sub-tree with a depth less than that stored in the table
are freed.
The sub-tree must be binary and complete up until the given depth.
_nodes: The nodes table to fill.
_binode: The root of the sub-tree to fill it with.
_binode->nbits must be 0 or 1.
_level: The current level in the table.
0 indicates that the current node should be stored, regardless of
whether it is a leaf node or an internal node.
_depth: The depth of the nodes to fill the table with, relative to their
parent.*/
static void oc_huff_node_fill(oc_huff_node **_nodes,
oc_huff_node *_binode,int _level,int _depth,char **_storage){
if(_level<=0||_binode->nbits==0){
int i;
_binode->depth=(unsigned char)(_depth-_level);
_nodes[0]=oc_huff_tree_collapse(_binode,_storage);
for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0];
}
else{
_level--;
oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth,_storage);
_nodes+=1<<_level;
oc_huff_node_fill(_nodes,_binode->nodes[1],_level,_depth,_storage);
}
}
/*Finds the largest complete sub-tree rooted at the current node and collapses
it into a single node.
This procedure is then applied recursively to all the children of that node.
_binode: The root of the sub-tree to collapse.
_binode->nbits must be 0 or 1.
Return: The new root of the collapsed sub-tree.*/
static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
char **_storage){
oc_huff_node *root;
size_t size;
int mindepth;
int depth;
int loccupancy;
int occupancy;
depth=mindepth=oc_huff_tree_mindepth(_binode);
occupancy=1<<mindepth;
do{
loccupancy=occupancy;
occupancy=oc_huff_tree_occupancy(_binode,++depth);
}
while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
depth--;
if(depth<=1)return oc_huff_tree_copy(_binode,_storage);
size=oc_huff_node_size(depth);
root=oc_huff_node_init(_storage,size,depth);
root->depth=_binode->depth;
oc_huff_node_fill(root->nodes,_binode,depth,depth,_storage);
return root;
}
/*Unpacks a set of Huffman trees, and reduces them to a collapsed
representation.
_opb: The buffer to unpack the trees from.
_nodes: The table to fill with the Huffman trees.
Return: 0 on success, or a negative value on error.*/
int oc_huff_trees_unpack(oc_pack_buf *_opb,
oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
int i;
for(i=0;i<TH_NHUFFMAN_TABLES;i++){
oc_huff_node nodes[511];
char *storage;
size_t size;
int ret;
/*Unpack the full tree into a temporary buffer.*/
ret=oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes));
if(ret<0)return ret;
/*Figure out how big the collapsed tree will be.*/
size=oc_huff_tree_collapse_size(nodes,0);
storage=(char *)_ogg_calloc(1,size);
if(storage==NULL)return TH_EFAULT;
/*And collapse it.*/
_nodes[i]=oc_huff_tree_collapse(nodes,&storage);
}
return 0;
}
/*Makes a copy of the given set of Huffman trees.
_dst: The array to store the copy in.
_src: The array of trees to copy.*/
int oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]){
int i;
for(i=0;i<TH_NHUFFMAN_TABLES;i++){
size_t size;
char *storage;
size=oc_huff_tree_size(_src[i]);
storage=(char *)_ogg_calloc(1,size);
if(storage==NULL){
while(i-->0)_ogg_free(_dst[i]);
return TH_EFAULT;
}
_dst[i]=oc_huff_tree_copy(_src[i],&storage);
}
return 0;
}
/*Frees the memory used by a set of Huffman trees.
_nodes: The array of trees to free.*/
void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
int i;
for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
}
/*Unpacks a single token using the given Huffman tree.
_opb: The buffer to unpack the token from.
_node: The tree to unpack the token with.
Return: The token value.*/
int oc_huff_token_decode(oc_pack_buf *_opb,const oc_huff_node *_node){
long bits;
while(_node->nbits!=0){
bits=oc_pack_look(_opb,_node->nbits);
_node=_node->nodes[bits];
oc_pack_adv(_opb,_node->depth);
}
return _node->token;
}

View File

@ -0,0 +1,92 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: huffdec.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#if !defined(_huffdec_H)
# define _huffdec_H (1)
# include "huffman.h"
# include "bitpack.h"
typedef struct oc_huff_node oc_huff_node;
/*A node in the Huffman tree.
Instead of storing every branching in the tree, subtrees can be collapsed
into one node, with a table of size 1<<nbits pointing directly to its
descedents nbits levels down.
This allows more than one bit to be read at a time, and avoids following all
the intermediate branches with next to no increased code complexity once
the collapsed tree has been built.
We do _not_ require that a subtree be complete to be collapsed, but instead
store duplicate pointers in the table, and record the actual depth of the
node below its parent.
This tells us the number of bits to advance the stream after reaching it.
This turns out to be equivalent to the method described in \cite{Hash95},
without the requirement that codewords be sorted by length.
If the codewords were sorted by length (so-called ``canonical-codes''), they
could be decoded much faster via either Lindell and Moffat's approach or
Hashemian's Condensed Huffman Code approach, the latter of which has an
extremely small memory footprint.
We can't use Choueka et al.'s finite state machine approach, which is
extremely fast, because we can't allow multiple symbols to be output at a
time; the codebook can and does change between symbols.
It also has very large memory requirements, which impairs cache coherency.
@ARTICLE{Hash95,
author="Reza Hashemian",
title="Memory Efficient and High-Speed Search {Huffman} Coding",
journal="{IEEE} Transactions on Communications",
volume=43,
number=10,
pages="2576--2581",
month=Oct,
year=1995
}*/
struct oc_huff_node{
/*The number of bits of the code needed to descend through this node.
0 indicates a leaf node.
Otherwise there are 1<<nbits nodes in the nodes table, which can be
indexed by reading nbits bits from the stream.*/
unsigned char nbits;
/*The value of a token stored in a leaf node.
The value in non-leaf nodes is undefined.*/
unsigned char token;
/*The depth of the current node, relative to its parent in the collapsed
tree.
This can be less than its parent's nbits value, in which case there are
1<<nbits-depth copies of this node in the table, and the bitstream should
only be advanced depth bits after reaching this node.*/
unsigned char depth;
/*The table of child nodes.
The ACTUAL size of this array is 1<<nbits, despite what the declaration
below claims.
The exception is that for leaf nodes the size is 0.*/
oc_huff_node *nodes[2];
};
int oc_huff_trees_unpack(oc_pack_buf *_opb,
oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
int oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]);
void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
int oc_huff_token_decode(oc_pack_buf *_opb,const oc_huff_node *_node);
#endif

View File

@ -0,0 +1,910 @@
#include <stdlib.h>
#include <string.h>
#include <ogg/ogg.h>
#include "huffenc.h"
/*The default Huffman codes used for VP3.1.*/
const th_huff_code TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]={
{
{0x002D, 6},{0x0026, 7},{0x0166, 9},{0x004E, 8},
{0x02CE,10},{0x059E,11},{0x027D,11},{0x0008, 5},
{0x04F9,12},{0x000F, 4},{0x000E, 4},{0x001B, 5},
{0x0006, 4},{0x0008, 4},{0x0005, 4},{0x001A, 5},
{0x0015, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3},
{0x0000, 3},{0x0009, 4},{0x0017, 5},{0x0029, 6},
{0x0028, 6},{0x00B2, 8},{0x04F8,12},{0x059F,11},
{0x009E, 9},{0x013F,10},{0x0012, 6},{0x0058, 7}
},
{
{0x0010, 5},{0x0047, 7},{0x01FF, 9},{0x008C, 8},
{0x03FC,10},{0x046A,11},{0x0469,11},{0x0022, 6},
{0x11A1,13},{0x000E, 4},{0x000D, 4},{0x0004, 4},
{0x0005, 4},{0x0009, 4},{0x0006, 4},{0x001E, 5},
{0x0016, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3},
{0x0000, 3},{0x000A, 4},{0x0017, 5},{0x007D, 7},
{0x007E, 7},{0x011B, 9},{0x08D1,12},{0x03FD,10},
{0x046B,11},{0x11A0,13},{0x007C, 7},{0x00FE, 8}
},
{
{0x0016, 5},{0x0020, 6},{0x0086, 8},{0x0087, 8},
{0x0367,10},{0x06CC,11},{0x06CB,11},{0x006E, 7},
{0x366D,14},{0x000F, 4},{0x000E, 4},{0x0004, 4},
{0x0005, 4},{0x000A, 4},{0x0006, 4},{0x001A, 5},
{0x0011, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3},
{0x0000, 3},{0x0009, 4},{0x0017, 5},{0x006F, 7},
{0x006D, 7},{0x0364,10},{0x0D9A,12},{0x06CA,11},
{0x1B37,13},{0x366C,14},{0x0042, 7},{0x00D8, 8}
},
{
{0x0000, 4},{0x002D, 6},{0x00F7, 8},{0x0058, 7},
{0x0167, 9},{0x02CB,10},{0x02CA,10},{0x000E, 6},
{0x1661,13},{0x0003, 3},{0x0002, 3},{0x0008, 4},
{0x0009, 4},{0x000D, 4},{0x0002, 4},{0x001F, 5},
{0x0017, 5},{0x0001, 4},{0x000C, 4},{0x000E, 4},
{0x000A, 4},{0x0006, 5},{0x0078, 7},{0x000F, 6},
{0x007A, 7},{0x0164, 9},{0x0599,11},{0x02CD,10},
{0x0B31,12},{0x1660,13},{0x0079, 7},{0x00F6, 8}
},
{
{0x0003, 4},{0x003C, 6},{0x000F, 7},{0x007A, 7},
{0x001D, 8},{0x0020, 9},{0x0072,10},{0x0006, 6},
{0x0399,13},{0x0004, 3},{0x0005, 3},{0x0005, 4},
{0x0006, 4},{0x000E, 4},{0x0004, 4},{0x0000, 4},
{0x0019, 5},{0x0002, 4},{0x000D, 4},{0x0007, 4},
{0x001F, 5},{0x0030, 6},{0x0011, 8},{0x0031, 6},
{0x0005, 6},{0x0021, 9},{0x00E7,11},{0x0038, 9},
{0x01CD,12},{0x0398,13},{0x007B, 7},{0x0009, 7}
},
{
{0x0009, 4},{0x0002, 5},{0x0074, 7},{0x0007, 6},
{0x00EC, 8},{0x00D1, 9},{0x01A6,10},{0x0006, 6},
{0x0D21,13},{0x0005, 3},{0x0006, 3},{0x0008, 4},
{0x0007, 4},{0x000F, 4},{0x0004, 4},{0x0000, 4},
{0x001C, 5},{0x0002, 4},{0x0005, 4},{0x0003, 4},
{0x000C, 5},{0x0035, 7},{0x01A7,10},{0x001B, 6},
{0x0077, 7},{0x01A5,10},{0x0349,11},{0x00D0, 9},
{0x0691,12},{0x0D20,13},{0x0075, 7},{0x00ED, 8}
},
{
{0x000A, 4},{0x000C, 5},{0x0012, 6},{0x001B, 6},
{0x00B7, 8},{0x016C, 9},{0x0099, 9},{0x005A, 7},
{0x16D8,13},{0x0007, 3},{0x0006, 3},{0x0009, 4},
{0x0008, 4},{0x0000, 3},{0x0005, 4},{0x0017, 5},
{0x000E, 5},{0x0002, 4},{0x0003, 4},{0x000F, 5},
{0x001A, 6},{0x004D, 8},{0x2DB3,14},{0x002C, 6},
{0x0011, 6},{0x02DA,10},{0x05B7,11},{0x0098, 9},
{0x0B6D,12},{0x2DB2,14},{0x0010, 6},{0x0027, 7}
},
{
{0x000D, 4},{0x000F, 5},{0x001D, 6},{0x0008, 5},
{0x0051, 7},{0x0056, 8},{0x00AF, 9},{0x002A, 7},
{0x148A,13},{0x0007, 3},{0x0000, 2},{0x0008, 4},
{0x0009, 4},{0x000C, 4},{0x0006, 4},{0x0017, 5},
{0x000B, 5},{0x0016, 5},{0x0015, 5},{0x0009, 5},
{0x0050, 7},{0x00AE, 9},{0x2917,14},{0x001C, 6},
{0x0014, 6},{0x0290,10},{0x0523,11},{0x0149, 9},
{0x0A44,12},{0x2916,14},{0x0053, 7},{0x00A5, 8}
},
{
{0x0001, 4},{0x001D, 6},{0x00F5, 8},{0x00F4, 8},
{0x024D,10},{0x0499,11},{0x0498,11},{0x0001, 5},
{0x0021, 6},{0x0006, 3},{0x0005, 3},{0x0006, 4},
{0x0005, 4},{0x0002, 4},{0x0007, 5},{0x0025, 6},
{0x007B, 7},{0x001C, 6},{0x0020, 6},{0x000D, 6},
{0x0048, 7},{0x0092, 8},{0x0127, 9},{0x000E, 4},
{0x0004, 4},{0x0011, 5},{0x000C, 6},{0x003C, 6},
{0x000F, 5},{0x0000, 5},{0x001F, 5},{0x0013, 5}
},
{
{0x0005, 4},{0x003C, 6},{0x0040, 7},{0x000D, 7},
{0x0031, 9},{0x0061,10},{0x0060,10},{0x0002, 5},
{0x00F5, 8},{0x0006, 3},{0x0005, 3},{0x0007, 4},
{0x0006, 4},{0x0002, 4},{0x0009, 5},{0x0025, 6},
{0x0007, 6},{0x0021, 6},{0x0024, 6},{0x0010, 6},
{0x0041, 7},{0x00F4, 8},{0x0019, 8},{0x000E, 4},
{0x0003, 4},{0x0011, 5},{0x0011, 6},{0x003F, 6},
{0x003E, 6},{0x007B, 7},{0x0000, 4},{0x0013, 5}
},
{
{0x000A, 4},{0x0007, 5},{0x0001, 6},{0x0009, 6},
{0x0131, 9},{0x0261,10},{0x0260,10},{0x0015, 6},
{0x0001, 7},{0x0007, 3},{0x0006, 3},{0x0008, 4},
{0x0007, 4},{0x0006, 4},{0x0012, 5},{0x002F, 6},
{0x0014, 6},{0x0027, 6},{0x002D, 6},{0x0016, 6},
{0x004D, 7},{0x0099, 8},{0x0000, 7},{0x0004, 4},
{0x0001, 4},{0x0005, 5},{0x0017, 6},{0x002E, 6},
{0x002C, 6},{0x0008, 6},{0x0006, 5},{0x0001, 5}
},
{
{0x0000, 3},{0x000E, 5},{0x0017, 6},{0x002A, 6},
{0x0010, 7},{0x00F9,10},{0x00F8,10},{0x001E, 7},
{0x003F, 8},{0x0007, 3},{0x0006, 3},{0x0009, 4},
{0x0008, 4},{0x0006, 4},{0x000F, 5},{0x0005, 5},
{0x0016, 6},{0x0029, 6},{0x002B, 6},{0x0015, 6},
{0x0050, 7},{0x0011, 7},{0x007D, 9},{0x0004, 4},
{0x0017, 5},{0x0006, 5},{0x0014, 6},{0x002C, 6},
{0x002D, 6},{0x000E, 6},{0x0009, 6},{0x0051, 7}
},
{
{0x0002, 3},{0x0018, 5},{0x002F, 6},{0x000D, 5},
{0x0053, 7},{0x0295,10},{0x0294,10},{0x00A4, 8},
{0x007C, 8},{0x0000, 2},{0x0007, 3},{0x0009, 4},
{0x0008, 4},{0x001B, 5},{0x000C, 5},{0x0028, 6},
{0x006A, 7},{0x001E, 6},{0x001D, 6},{0x0069, 7},
{0x00D7, 8},{0x007D, 8},{0x014B, 9},{0x0019, 5},
{0x0016, 5},{0x002E, 6},{0x001C, 6},{0x002B, 6},
{0x002A, 6},{0x0068, 7},{0x003F, 7},{0x00D6, 8}
},
{
{0x0002, 3},{0x001B, 5},{0x000C, 5},{0x0018, 5},
{0x0029, 6},{0x007F, 8},{0x02F0,10},{0x0198, 9},
{0x0179, 9},{0x0000, 2},{0x0007, 3},{0x0009, 4},
{0x0008, 4},{0x001A, 5},{0x000D, 5},{0x002A, 6},
{0x0064, 7},{0x001E, 6},{0x0067, 7},{0x005F, 7},
{0x00CD, 8},{0x007E, 8},{0x02F1,10},{0x0016, 5},
{0x000E, 5},{0x002E, 6},{0x0065, 7},{0x002B, 6},
{0x0028, 6},{0x003E, 7},{0x00BD, 8},{0x0199, 9}
},
{
{0x0002, 3},{0x0007, 4},{0x0016, 5},{0x0006, 4},
{0x0036, 6},{0x005C, 7},{0x015D, 9},{0x015C, 9},
{0x02BF,10},{0x0000, 2},{0x0007, 3},{0x0009, 4},
{0x0008, 4},{0x0018, 5},{0x0034, 6},{0x002A, 6},
{0x005E, 7},{0x006A, 7},{0x0064, 7},{0x005D, 7},
{0x00CB, 8},{0x00AD, 8},{0x02BE,10},{0x0014, 5},
{0x0033, 6},{0x006E, 7},{0x005F, 7},{0x006F, 7},
{0x006B, 7},{0x00CA, 8},{0x00AC, 8},{0x015E, 9}
},
{
{0x000F, 4},{0x001D, 5},{0x0018, 5},{0x000B, 4},
{0x0019, 5},{0x0029, 6},{0x00D6, 8},{0x0551,11},
{0x0AA1,12},{0x0001, 2},{0x0000, 2},{0x0009, 4},
{0x0008, 4},{0x001B, 5},{0x0038, 6},{0x0028, 6},
{0x0057, 7},{0x006A, 7},{0x0068, 7},{0x0056, 7},
{0x00E5, 8},{0x0155, 9},{0x0AA0,12},{0x0073, 7},
{0x0069, 7},{0x00D7, 8},{0x00AB, 8},{0x00E4, 8},
{0x00A9, 8},{0x0151, 9},{0x0150, 9},{0x02A9,10}
},
{
{0x0008, 5},{0x0025, 7},{0x017A, 9},{0x02F7,10},
{0x0BDB,12},{0x17B4,13},{0x2F6B,14},{0x001D, 5},
{0x2F6A,14},{0x0008, 4},{0x0007, 4},{0x0001, 4},
{0x0002, 4},{0x000A, 4},{0x0006, 4},{0x0000, 4},
{0x001C, 5},{0x0009, 4},{0x000D, 4},{0x000F, 4},
{0x000C, 4},{0x0003, 4},{0x000A, 5},{0x0016, 5},
{0x0013, 6},{0x005D, 7},{0x0024, 7},{0x00BC, 8},
{0x005C, 7},{0x05EC,11},{0x000B, 5},{0x005F, 7}
},
{
{0x000F, 5},{0x0010, 6},{0x004B, 8},{0x00C6, 8},
{0x031D,10},{0x0C71,12},{0x0C70,12},{0x0001, 4},
{0x0C73,12},{0x0008, 4},{0x0009, 4},{0x0002, 4},
{0x0003, 4},{0x000B, 4},{0x0006, 4},{0x0000, 4},
{0x001C, 5},{0x0005, 4},{0x000D, 4},{0x000F, 4},
{0x000A, 4},{0x0019, 5},{0x0013, 6},{0x001D, 5},
{0x0030, 6},{0x0062, 7},{0x0024, 7},{0x004A, 8},
{0x018F, 9},{0x0C72,12},{0x000E, 5},{0x0011, 6}
},
{
{0x001B, 5},{0x0003, 6},{0x008D, 8},{0x0040, 7},
{0x0239,10},{0x0471,11},{0x08E0,12},{0x0003, 4},
{0x11C3,13},{0x000A, 4},{0x0009, 4},{0x0004, 4},
{0x0005, 4},{0x000E, 4},{0x0007, 4},{0x0001, 4},
{0x001E, 5},{0x0006, 4},{0x000C, 4},{0x000B, 4},
{0x0002, 4},{0x0000, 5},{0x0041, 7},{0x001F, 5},
{0x0022, 6},{0x0002, 6},{0x008F, 8},{0x008C, 8},
{0x011D, 9},{0x11C2,13},{0x001A, 5},{0x0021, 6}
},
{
{0x001F, 5},{0x0003, 6},{0x0003, 7},{0x0043, 7},
{0x000B, 9},{0x0015,10},{0x0051,12},{0x0003, 4},
{0x0050,12},{0x000D, 4},{0x000C, 4},{0x0004, 4},
{0x0006, 4},{0x000E, 4},{0x000A, 4},{0x0001, 4},
{0x001E, 5},{0x0005, 4},{0x0009, 4},{0x0007, 4},
{0x0011, 5},{0x0002, 6},{0x0004, 8},{0x0002, 4},
{0x002D, 6},{0x0020, 6},{0x0042, 7},{0x0001, 7},
{0x0000, 7},{0x0029,11},{0x0017, 5},{0x002C, 6}
},
{
{0x0003, 4},{0x001F, 6},{0x003A, 7},{0x005D, 7},
{0x0173, 9},{0x02E4,10},{0x172D,13},{0x0004, 4},
{0x172C,13},{0x000F, 4},{0x000E, 4},{0x0009, 4},
{0x0008, 4},{0x000C, 4},{0x000A, 4},{0x0001, 4},
{0x0016, 5},{0x0002, 4},{0x0005, 4},{0x001A, 5},
{0x002F, 6},{0x0038, 7},{0x05CA,11},{0x0006, 4},
{0x0037, 6},{0x001E, 6},{0x003B, 7},{0x0039, 7},
{0x00B8, 8},{0x0B97,12},{0x0000, 4},{0x0036, 6}
},
{
{0x0006, 4},{0x0037, 6},{0x005D, 7},{0x000C, 6},
{0x00B9, 8},{0x02E3,10},{0x05C4,11},{0x0004, 4},
{0x1715,13},{0x0000, 3},{0x000F, 4},{0x0008, 4},
{0x0007, 4},{0x000C, 4},{0x0009, 4},{0x001D, 5},
{0x0016, 5},{0x001C, 5},{0x001A, 5},{0x000B, 5},
{0x005E, 7},{0x0170, 9},{0x1714,13},{0x000A, 4},
{0x000A, 5},{0x0036, 6},{0x005F, 7},{0x001B, 7},
{0x001A, 7},{0x0B8B,12},{0x0002, 4},{0x0007, 5}
},
{
{0x000C, 4},{0x000B, 5},{0x0079, 7},{0x0022, 6},
{0x00F0, 8},{0x0119, 9},{0x0230,10},{0x001D, 5},
{0x08C4,12},{0x0001, 3},{0x0000, 3},{0x000A, 4},
{0x0009, 4},{0x000B, 4},{0x0007, 4},{0x001C, 5},
{0x003D, 6},{0x000D, 5},{0x0008, 5},{0x0015, 6},
{0x008D, 8},{0x118B,13},{0x118A,13},{0x000D, 4},
{0x0010, 5},{0x0009, 5},{0x0014, 6},{0x0047, 7},
{0x00F1, 8},{0x0463,11},{0x001F, 5},{0x000C, 5}
},
{
{0x0000, 3},{0x001A, 5},{0x0033, 6},{0x000C, 5},
{0x0046, 7},{0x01E3, 9},{0x03C5,10},{0x0017, 5},
{0x1E21,13},{0x0002, 3},{0x0001, 3},{0x0009, 4},
{0x000A, 4},{0x0007, 4},{0x001B, 5},{0x003D, 6},
{0x001B, 6},{0x0022, 6},{0x0079, 7},{0x00F0, 8},
{0x1E20,13},{0x1E23,13},{0x1E22,13},{0x000E, 4},
{0x0016, 5},{0x0018, 5},{0x0032, 6},{0x001A, 6},
{0x0047, 7},{0x0789,11},{0x001F, 5},{0x0010, 5}
},
{
{0x001D, 5},{0x0061, 7},{0x004E, 8},{0x009E, 9},
{0x027C,11},{0x09F5,13},{0x09F4,13},{0x0003, 4},
{0x0060, 7},{0x0000, 3},{0x000F, 4},{0x000B, 4},
{0x000A, 4},{0x0009, 4},{0x0005, 4},{0x000D, 5},
{0x0031, 6},{0x0008, 5},{0x0038, 6},{0x0012, 6},
{0x0026, 7},{0x013F,10},{0x04FB,12},{0x000D, 4},
{0x0002, 4},{0x000C, 5},{0x0039, 6},{0x001C, 6},
{0x000F, 5},{0x001D, 6},{0x0008, 4},{0x0019, 5}
},
{
{0x0007, 4},{0x0019, 6},{0x00AB, 8},{0x00AA, 8},
{0x0119,10},{0x0461,12},{0x0460,12},{0x001B, 5},
{0x0047, 8},{0x0001, 3},{0x0000, 3},{0x000C, 4},
{0x000B, 4},{0x0009, 4},{0x0005, 4},{0x000D, 5},
{0x0035, 6},{0x003D, 6},{0x003C, 6},{0x0018, 6},
{0x0022, 7},{0x008D, 9},{0x0231,11},{0x000E, 4},
{0x001F, 5},{0x0009, 5},{0x002B, 6},{0x0010, 6},
{0x0034, 6},{0x0054, 7},{0x0008, 4},{0x0014, 5}
},
{
{0x000C, 4},{0x0005, 5},{0x0008, 6},{0x005B, 7},
{0x004D, 9},{0x0131,11},{0x0261,12},{0x001A, 5},
{0x0012, 7},{0x0000, 3},{0x000F, 4},{0x000A, 4},
{0x0009, 4},{0x0006, 4},{0x001B, 5},{0x0006, 5},
{0x001C, 6},{0x002C, 6},{0x0015, 6},{0x005A, 7},
{0x0027, 8},{0x0099,10},{0x0260,12},{0x000E, 4},
{0x0004, 4},{0x000F, 5},{0x0007, 5},{0x001D, 6},
{0x000B, 5},{0x0014, 6},{0x0008, 4},{0x0017, 5}
},
{
{0x000F, 4},{0x0013, 5},{0x0075, 7},{0x0024, 6},
{0x0095, 8},{0x0251,10},{0x04A0,11},{0x0010, 5},
{0x00C8, 8},{0x0002, 3},{0x0001, 3},{0x0001, 4},
{0x0000, 4},{0x001A, 5},{0x0011, 5},{0x002C, 6},
{0x0065, 7},{0x0074, 7},{0x004B, 7},{0x00C9, 8},
{0x0129, 9},{0x0943,12},{0x0942,12},{0x0003, 3},
{0x000A, 4},{0x001C, 5},{0x0018, 5},{0x0033, 6},
{0x0017, 5},{0x002D, 6},{0x001B, 5},{0x003B, 6}
},
{
{0x0003, 3},{0x001A, 5},{0x002D, 6},{0x0038, 6},
{0x0028, 7},{0x0395,10},{0x0E51,12},{0x0037, 6},
{0x00E4, 8},{0x0001, 3},{0x0000, 3},{0x001F, 5},
{0x001E, 5},{0x0017, 5},{0x003A, 6},{0x0073, 7},
{0x002A, 7},{0x002B, 7},{0x0029, 7},{0x01CB, 9},
{0x0729,11},{0x1CA1,13},{0x1CA0,13},{0x0004, 3},
{0x000A, 4},{0x0004, 4},{0x0018, 5},{0x0036, 6},
{0x000B, 5},{0x002C, 6},{0x0019, 5},{0x003B, 6}
},
{
{0x0004, 3},{0x0004, 4},{0x003F, 6},{0x0017, 5},
{0x0075, 7},{0x01F5, 9},{0x07D1,11},{0x0017, 6},
{0x01F6, 9},{0x0001, 3},{0x0000, 3},{0x001B, 5},
{0x001A, 5},{0x000A, 5},{0x0032, 6},{0x0074, 7},
{0x00F8, 8},{0x00F9, 8},{0x01F7, 9},{0x03E9,10},
{0x0FA0,12},{0x1F43,13},{0x1F42,13},{0x0003, 3},
{0x000A, 4},{0x001E, 5},{0x001C, 5},{0x003B, 6},
{0x0018, 5},{0x0016, 6},{0x0016, 5},{0x0033, 6}
},
{
{0x0004, 3},{0x0007, 4},{0x0018, 5},{0x001E, 5},
{0x0036, 6},{0x0031, 7},{0x0177, 9},{0x0077, 7},
{0x0176, 9},{0x0001, 3},{0x0000, 3},{0x001A, 5},
{0x0019, 5},{0x003A, 6},{0x0019, 6},{0x005C, 7},
{0x00BA, 8},{0x0061, 8},{0x00C1, 9},{0x0180,10},
{0x0302,11},{0x0607,12},{0x0606,12},{0x0002, 3},
{0x000A, 4},{0x001F, 5},{0x001C, 5},{0x0037, 6},
{0x0016, 5},{0x0076, 7},{0x000D, 5},{0x002F, 6}
},
{
{0x0000, 3},{0x000A, 4},{0x001A, 5},{0x000C, 4},
{0x001D, 5},{0x0039, 6},{0x0078, 7},{0x005E, 7},
{0x0393,11},{0x0002, 3},{0x0001, 3},{0x0016, 5},
{0x000F, 5},{0x002E, 6},{0x005F, 7},{0x0073, 8},
{0x00E5, 9},{0x01C8,10},{0x0E4A,13},{0x1C97,14},
{0x1C96,14},{0x0E49,13},{0x0E48,13},{0x0004, 3},
{0x0006, 4},{0x001F, 5},{0x001B, 5},{0x001D, 6},
{0x0038, 6},{0x0038, 7},{0x003D, 6},{0x0079, 7}
},
{
{0x000B, 5},{0x002B, 7},{0x0054, 8},{0x01B7, 9},
{0x06D9,11},{0x0DB1,12},{0x0DB0,12},{0x0002, 4},
{0x00AB, 9},{0x0009, 4},{0x000A, 4},{0x0007, 4},
{0x0008, 4},{0x000F, 4},{0x000C, 4},{0x0003, 4},
{0x001D, 5},{0x0004, 4},{0x000B, 4},{0x0006, 4},
{0x001A, 5},{0x0003, 6},{0x00AA, 9},{0x0001, 4},
{0x0000, 5},{0x0014, 6},{0x006C, 7},{0x00DA, 8},
{0x0002, 6},{0x036D,10},{0x001C, 5},{0x0037, 6}
},
{
{0x001D, 5},{0x0004, 6},{0x00B6, 8},{0x006A, 8},
{0x05B9,11},{0x16E1,13},{0x16E0,13},{0x0007, 4},
{0x016F, 9},{0x000C, 4},{0x000D, 4},{0x0009, 4},
{0x0008, 4},{0x000F, 4},{0x000A, 4},{0x0003, 4},
{0x0017, 5},{0x0002, 4},{0x0004, 4},{0x001C, 5},
{0x002C, 6},{0x006B, 8},{0x0B71,12},{0x0005, 4},
{0x0003, 5},{0x001B, 6},{0x005A, 7},{0x0034, 7},
{0x0005, 6},{0x02DD,10},{0x0000, 4},{0x000C, 5}
},
{
{0x0003, 4},{0x007F, 7},{0x00A1, 8},{0x00A0, 8},
{0x020C,10},{0x0834,12},{0x106B,13},{0x0007, 4},
{0x0082, 8},{0x000E, 4},{0x000D, 4},{0x000B, 4},
{0x000C, 4},{0x0000, 3},{0x0009, 4},{0x0002, 4},
{0x0011, 5},{0x001E, 5},{0x0015, 5},{0x003E, 6},
{0x0040, 7},{0x041B,11},{0x106A,13},{0x0006, 4},
{0x000A, 5},{0x0029, 6},{0x007E, 7},{0x0051, 7},
{0x0021, 6},{0x0107, 9},{0x0004, 4},{0x000B, 5}
},
{
{0x0007, 4},{0x001B, 6},{0x00F6, 8},{0x00E9, 8},
{0x03A1,10},{0x0740,11},{0x0E82,12},{0x001F, 5},
{0x01EF, 9},{0x0001, 3},{0x0002, 3},{0x000B, 4},
{0x000C, 4},{0x000D, 4},{0x0008, 4},{0x001C, 5},
{0x0003, 5},{0x0012, 5},{0x0002, 5},{0x0075, 7},
{0x01D1, 9},{0x1D07,13},{0x1D06,13},{0x000A, 4},
{0x0013, 5},{0x003B, 6},{0x001A, 6},{0x007A, 7},
{0x003C, 6},{0x01EE, 9},{0x0000, 4},{0x000C, 5}
},
{
{0x000D, 4},{0x003D, 6},{0x0042, 7},{0x0037, 7},
{0x00D9, 9},{0x0362,11},{0x06C6,12},{0x001F, 5},
{0x0086, 8},{0x0001, 3},{0x0002, 3},{0x000C, 4},
{0x000B, 4},{0x000A, 4},{0x0001, 4},{0x000F, 5},
{0x0025, 6},{0x003C, 6},{0x001A, 6},{0x0087, 8},
{0x01B0,10},{0x0D8F,13},{0x0D8E,13},{0x000E, 4},
{0x0013, 5},{0x000C, 5},{0x0024, 6},{0x0020, 6},
{0x0011, 5},{0x006D, 8},{0x0000, 4},{0x000E, 5}
},
{
{0x0000, 3},{0x0012, 5},{0x0076, 7},{0x0077, 7},
{0x014D, 9},{0x0533,11},{0x14C9,13},{0x0013, 5},
{0x00A5, 8},{0x0002, 3},{0x0003, 3},{0x000B, 4},
{0x000C, 4},{0x0008, 4},{0x001A, 5},{0x002B, 6},
{0x0075, 7},{0x0074, 7},{0x00A7, 8},{0x0298,10},
{0x14C8,13},{0x14CB,13},{0x14CA,13},{0x000F, 4},
{0x001C, 5},{0x0007, 5},{0x002A, 6},{0x0028, 6},
{0x001B, 5},{0x00A4, 8},{0x0002, 4},{0x0006, 5}
},
{
{0x0002, 3},{0x001A, 5},{0x002B, 6},{0x003A, 6},
{0x00ED, 8},{0x0283,10},{0x0A0A,12},{0x0004, 5},
{0x00A1, 8},{0x0004, 3},{0x0003, 3},{0x000B, 4},
{0x000C, 4},{0x001F, 5},{0x0006, 5},{0x0077, 7},
{0x00A3, 8},{0x00A2, 8},{0x0140, 9},{0x1417,13},
{0x1416,13},{0x0A09,12},{0x0A08,12},{0x0000, 3},
{0x001E, 5},{0x0007, 5},{0x002A, 6},{0x0029, 6},
{0x001C, 5},{0x00EC, 8},{0x001B, 5},{0x0005, 5}
},
{
{0x0002, 3},{0x0002, 4},{0x0018, 5},{0x001D, 5},
{0x0035, 6},{0x00E4, 8},{0x01CF,11},{0x001D, 7},
{0x0072, 9},{0x0004, 3},{0x0005, 3},{0x0006, 4},
{0x0007, 4},{0x0006, 5},{0x0073, 7},{0x0038, 8},
{0x01CE,11},{0x039B,12},{0x0398,12},{0x0733,13},
{0x0732,13},{0x0735,13},{0x0734,13},{0x0000, 3},
{0x001F, 5},{0x001B, 5},{0x0034, 6},{0x000F, 6},
{0x001E, 5},{0x00E5, 8},{0x0019, 5},{0x0038, 6}
},
{
{0x0016, 5},{0x0050, 7},{0x0172, 9},{0x02E7,10},
{0x1732,13},{0x2E67,14},{0x2E66,14},{0x0006, 4},
{0x0051, 7},{0x0001, 3},{0x0000, 3},{0x000D, 4},
{0x000C, 4},{0x0009, 4},{0x001C, 5},{0x0009, 5},
{0x001C, 6},{0x001D, 6},{0x005D, 7},{0x00B8, 8},
{0x05CD,11},{0x1731,13},{0x1730,13},{0x000F, 4},
{0x0005, 4},{0x000F, 5},{0x0008, 5},{0x0029, 6},
{0x001D, 5},{0x002F, 6},{0x0008, 4},{0x0015, 5}
},
{
{0x0009, 4},{0x0021, 6},{0x0040, 7},{0x00AD, 8},
{0x02B0,10},{0x1589,13},{0x1588,13},{0x001C, 5},
{0x005F, 7},{0x0000, 3},{0x000F, 4},{0x000D, 4},
{0x000C, 4},{0x0006, 4},{0x0011, 5},{0x002A, 6},
{0x0057, 7},{0x005E, 7},{0x0041, 7},{0x0159, 9},
{0x0563,11},{0x158B,13},{0x158A,13},{0x0001, 3},
{0x0005, 4},{0x0014, 5},{0x003B, 6},{0x002E, 6},
{0x0004, 4},{0x003A, 6},{0x0007, 4},{0x0016, 5}
},
{
{0x000E, 4},{0x0007, 5},{0x0046, 7},{0x0045, 7},
{0x0064, 9},{0x032A,12},{0x0657,13},{0x0018, 5},
{0x000D, 6},{0x0000, 3},{0x000F, 4},{0x000A, 4},
{0x000B, 4},{0x001A, 5},{0x0036, 6},{0x0047, 7},
{0x0044, 7},{0x0018, 7},{0x0033, 8},{0x00CB,10},
{0x0656,13},{0x0329,12},{0x0328,12},{0x0002, 3},
{0x0006, 4},{0x0019, 5},{0x000E, 5},{0x0037, 6},
{0x0009, 4},{0x000F, 5},{0x0002, 4},{0x0010, 5}
},
{
{0x0003, 3},{0x0018, 5},{0x0023, 6},{0x0077, 7},
{0x0194, 9},{0x1956,13},{0x32AF,14},{0x003A, 6},
{0x0076, 7},{0x0002, 3},{0x0001, 3},{0x001F, 5},
{0x001E, 5},{0x0014, 5},{0x0022, 6},{0x0064, 7},
{0x0197, 9},{0x0196, 9},{0x032B,10},{0x0654,11},
{0x32AE,14},{0x1955,13},{0x1954,13},{0x0000, 3},
{0x0009, 4},{0x001C, 5},{0x0015, 5},{0x0010, 5},
{0x000D, 4},{0x0017, 5},{0x0016, 5},{0x0033, 6}
},
{
{0x0005, 3},{0x0006, 4},{0x003E, 6},{0x0010, 5},
{0x0048, 7},{0x093F,12},{0x24FA,14},{0x0032, 6},
{0x0067, 7},{0x0002, 3},{0x0001, 3},{0x001B, 5},
{0x001E, 5},{0x0034, 6},{0x0066, 7},{0x0092, 8},
{0x0126, 9},{0x024E,10},{0x049E,11},{0x49F7,15},
{0x49F6,15},{0x24F9,14},{0x24F8,14},{0x0000, 3},
{0x0007, 4},{0x0018, 5},{0x0011, 5},{0x003F, 6},
{0x000E, 4},{0x0013, 5},{0x0035, 6},{0x0025, 6}
},
{
{0x0005, 3},{0x0008, 4},{0x0012, 5},{0x001C, 5},
{0x001C, 6},{0x00EA, 9},{0x1D75,14},{0x001E, 6},
{0x0066, 7},{0x0001, 3},{0x0002, 3},{0x001B, 5},
{0x001A, 5},{0x001F, 6},{0x003B, 7},{0x0074, 8},
{0x01D6,10},{0x03AF,11},{0x1D74,14},{0x1D77,14},
{0x1D76,14},{0x0EB9,13},{0x0EB8,13},{0x000F, 4},
{0x0006, 4},{0x0013, 5},{0x003B, 6},{0x003A, 6},
{0x0000, 3},{0x0018, 5},{0x0032, 6},{0x0067, 7}
},
{
{0x0004, 3},{0x000A, 4},{0x001B, 5},{0x000C, 4},
{0x000D, 5},{0x00E6, 8},{0x0684,11},{0x0072, 7},
{0x00E7, 8},{0x0002, 3},{0x0001, 3},{0x0017, 5},
{0x0016, 5},{0x0018, 6},{0x00D1, 8},{0x01A0, 9},
{0x0686,11},{0x0D0F,12},{0x0D0A,12},{0x1A17,13},
{0x1A16,13},{0x1A1D,13},{0x1A1C,13},{0x000F, 4},
{0x001D, 5},{0x000E, 5},{0x0035, 6},{0x0038, 6},
{0x0000, 3},{0x000F, 5},{0x0019, 6},{0x0069, 7}
},
{
{0x0003, 3},{0x000C, 4},{0x001B, 5},{0x0000, 3},
{0x0003, 4},{0x002E, 6},{0x0051, 9},{0x00BC, 8},
{0x0053, 9},{0x0004, 3},{0x0002, 3},{0x0016, 5},
{0x0015, 5},{0x0015, 7},{0x0050, 9},{0x00A4,10},
{0x0294,12},{0x052B,13},{0x052A,13},{0x052D,13},
{0x052C,13},{0x052F,13},{0x052E,13},{0x000E, 4},
{0x001A, 5},{0x0004, 5},{0x0028, 6},{0x0029, 6},
{0x000F, 4},{0x000B, 6},{0x005F, 7},{0x00BD, 8}
},
{
{0x0003, 4},{0x0009, 6},{0x00D0, 8},{0x01A3, 9},
{0x0344,10},{0x0D14,12},{0x1A2B,13},{0x0004, 4},
{0x0015, 7},{0x0000, 3},{0x000F, 4},{0x000B, 4},
{0x000C, 4},{0x000E, 4},{0x0009, 4},{0x001B, 5},
{0x000A, 5},{0x0014, 5},{0x000D, 5},{0x002A, 6},
{0x0014, 7},{0x068B,11},{0x1A2A,13},{0x0008, 4},
{0x000B, 5},{0x002B, 6},{0x000B, 6},{0x0069, 7},
{0x0035, 6},{0x0008, 6},{0x0007, 4},{0x000C, 5}
},
{
{0x000A, 4},{0x003C, 6},{0x0032, 7},{0x0030, 7},
{0x00C5, 9},{0x0621,12},{0x0620,12},{0x001F, 5},
{0x0033, 7},{0x0001, 3},{0x0000, 3},{0x000E, 4},
{0x000D, 4},{0x000C, 4},{0x0004, 4},{0x000D, 5},
{0x0026, 6},{0x0027, 6},{0x0014, 6},{0x0063, 8},
{0x0189,10},{0x0623,12},{0x0622,12},{0x000B, 4},
{0x0012, 5},{0x003D, 6},{0x0022, 6},{0x0015, 6},
{0x000B, 5},{0x0023, 6},{0x0007, 4},{0x0010, 5}
},
{
{0x000F, 4},{0x000C, 5},{0x0043, 7},{0x0010, 6},
{0x0044, 8},{0x0114,10},{0x0455,12},{0x0018, 5},
{0x0023, 7},{0x0001, 3},{0x0000, 3},{0x000E, 4},
{0x000D, 4},{0x0009, 4},{0x0019, 5},{0x0009, 5},
{0x0017, 6},{0x0016, 6},{0x0042, 7},{0x008B, 9},
{0x0454,12},{0x0457,12},{0x0456,12},{0x000B, 4},
{0x0015, 5},{0x000A, 5},{0x0029, 6},{0x0020, 6},
{0x000D, 5},{0x0028, 6},{0x0007, 4},{0x0011, 5}
},
{
{0x0001, 3},{0x001A, 5},{0x0029, 6},{0x002A, 6},
{0x00A0, 8},{0x0285,10},{0x1425,13},{0x0002, 5},
{0x0000, 7},{0x0002, 3},{0x0003, 3},{0x000C, 4},
{0x000B, 4},{0x0008, 4},{0x0012, 5},{0x0001, 6},
{0x0051, 7},{0x0001, 7},{0x0143, 9},{0x0508,11},
{0x1424,13},{0x1427,13},{0x1426,13},{0x000F, 4},
{0x001C, 5},{0x0003, 5},{0x0037, 6},{0x002B, 6},
{0x0013, 5},{0x0036, 6},{0x001D, 5},{0x0001, 5}
},
{
{0x0004, 3},{0x001F, 5},{0x003D, 6},{0x0006, 5},
{0x0016, 7},{0x0053, 9},{0x014A,11},{0x0034, 6},
{0x002A, 8},{0x0002, 3},{0x0003, 3},{0x000B, 4},
{0x000C, 4},{0x001C, 5},{0x0037, 6},{0x0017, 7},
{0x002B, 8},{0x0028, 8},{0x00A4,10},{0x052D,13},
{0x052C,13},{0x052F,13},{0x052E,13},{0x0000, 3},
{0x001D, 5},{0x0007, 5},{0x0004, 5},{0x0035, 6},
{0x0014, 5},{0x0036, 6},{0x0015, 5},{0x003C, 6}
},
{
{0x0004, 3},{0x000A, 4},{0x0007, 5},{0x001D, 5},
{0x0009, 6},{0x01F3, 9},{0x07C7,11},{0x0008, 6},
{0x01F0, 9},{0x0003, 3},{0x0002, 3},{0x000D, 4},
{0x000C, 4},{0x0017, 5},{0x007D, 7},{0x01F2, 9},
{0x07C6,11},{0x07C5,11},{0x1F12,13},{0x3E27,14},
{0x3E26,14},{0x1F11,13},{0x1F10,13},{0x0000, 3},
{0x001E, 5},{0x0006, 5},{0x0039, 6},{0x0038, 6},
{0x003F, 6},{0x002C, 6},{0x0005, 5},{0x002D, 6}
},
{
{0x0002, 3},{0x0007, 4},{0x0018, 5},{0x0003, 4},
{0x0005, 5},{0x0035, 7},{0x004F, 9},{0x0012, 7},
{0x04E5,13},{0x0005, 3},{0x0004, 3},{0x000D, 4},
{0x000E, 4},{0x0033, 6},{0x0026, 8},{0x009D,10},
{0x04E4,13},{0x04E7,13},{0x04E6,13},{0x04E1,13},
{0x04E0,13},{0x04E3,13},{0x04E2,13},{0x0000, 3},
{0x001F, 5},{0x000C, 5},{0x003D, 6},{0x003C, 6},
{0x0032, 6},{0x0034, 7},{0x001B, 6},{0x0008, 6}
},
{
{0x0000, 3},{0x0004, 4},{0x001C, 5},{0x000F, 4},
{0x0002, 4},{0x0007, 5},{0x0075, 7},{0x00E8, 8},
{0x1D2A,13},{0x0005, 3},{0x0004, 3},{0x000D, 4},
{0x000C, 4},{0x0077, 7},{0x0E96,12},{0x3A57,14},
{0x3A56,14},{0x3A5D,14},{0x3A5C,14},{0x3A5F,14},
{0x3A5E,14},{0x1D29,13},{0x1D28,13},{0x0003, 3},
{0x0006, 5},{0x000A, 5},{0x002C, 7},{0x0017, 6},
{0x0076, 7},{0x01D3, 9},{0x03A4,10},{0x002D, 7}
},
{
{0x000A, 4},{0x0024, 6},{0x00BF, 8},{0x0085, 8},
{0x0211,10},{0x0842,12},{0x1087,13},{0x0018, 5},
{0x0020, 6},{0x0001, 3},{0x0002, 3},{0x000E, 4},
{0x000D, 4},{0x0007, 4},{0x0013, 5},{0x0025, 6},
{0x005E, 7},{0x0043, 7},{0x00BE, 8},{0x0109, 9},
{0x1086,13},{0x0841,12},{0x0840,12},{0x000F, 4},
{0x0001, 4},{0x0011, 5},{0x0000, 5},{0x002E, 6},
{0x0019, 5},{0x0001, 5},{0x0006, 4},{0x0016, 5}
},
{
{0x0002, 3},{0x000F, 5},{0x006F, 7},{0x0061, 7},
{0x0374,10},{0x1BA8,13},{0x3753,14},{0x0012, 5},
{0x0036, 6},{0x0000, 3},{0x0001, 3},{0x000A, 4},
{0x000B, 4},{0x001A, 5},{0x0031, 6},{0x0060, 7},
{0x00DC, 8},{0x01BB, 9},{0x06EB,11},{0x1BAB,13},
{0x3752,14},{0x3755,14},{0x3754,14},{0x000E, 4},
{0x0006, 4},{0x0013, 5},{0x000E, 5},{0x003E, 6},
{0x0008, 4},{0x001E, 5},{0x0019, 5},{0x003F, 6}
},
{
{0x0003, 3},{0x001C, 5},{0x0025, 6},{0x0024, 6},
{0x01DA, 9},{0x1DBD,13},{0x3B7C,14},{0x003C, 6},
{0x003D, 6},{0x0000, 3},{0x0001, 3},{0x000B, 4},
{0x000A, 4},{0x000B, 5},{0x0077, 7},{0x00EC, 8},
{0x03B6,10},{0x076E,11},{0x1DBF,13},{0x76FB,15},
{0x76FA,15},{0x3B79,14},{0x3B78,14},{0x000D, 4},
{0x001F, 5},{0x0013, 5},{0x000A, 5},{0x0008, 5},
{0x000C, 4},{0x0008, 4},{0x0009, 5},{0x003A, 6}
},
{
{0x0005, 3},{0x0003, 4},{0x0004, 5},{0x0010, 5},
{0x008F, 8},{0x0475,11},{0x11D1,13},{0x0079, 7},
{0x0027, 6},{0x0002, 3},{0x0003, 3},{0x0001, 4},
{0x0000, 4},{0x0026, 6},{0x0046, 7},{0x011C, 9},
{0x0477,11},{0x08ED,12},{0x11D0,13},{0x11D3,13},
{0x11D2,13},{0x11D9,13},{0x11D8,13},{0x000D, 4},
{0x001F, 5},{0x0012, 5},{0x0005, 5},{0x003D, 6},
{0x000C, 4},{0x000E, 4},{0x0022, 6},{0x0078, 7}
},
{
{0x0005, 3},{0x000C, 4},{0x001B, 5},{0x0000, 4},
{0x0006, 6},{0x03E2,10},{0x3E3D,14},{0x000F, 7},
{0x0034, 6},{0x0003, 3},{0x0002, 3},{0x001E, 5},
{0x001D, 5},{0x007D, 7},{0x01F0, 9},{0x07C6,11},
{0x3E3C,14},{0x3E3F,14},{0x3E3E,14},{0x3E39,14},
{0x3E38,14},{0x3E3B,14},{0x3E3A,14},{0x0008, 4},
{0x001C, 5},{0x0002, 5},{0x003F, 6},{0x0035, 6},
{0x0009, 4},{0x0001, 3},{0x000E, 7},{0x00F9, 8}
},
{
{0x0004, 3},{0x000B, 4},{0x0001, 4},{0x000A, 4},
{0x001E, 6},{0x00E0, 9},{0x0E1E,13},{0x0071, 8},
{0x0039, 7},{0x0007, 3},{0x0006, 3},{0x000D, 5},
{0x000C, 5},{0x0020, 7},{0x01C2,10},{0x1C3F,14},
{0x1C3E,14},{0x0E19,13},{0x0E18,13},{0x0E1B,13},
{0x0E1A,13},{0x0E1D,13},{0x0E1C,13},{0x0000, 4},
{0x0009, 5},{0x001D, 6},{0x001F, 6},{0x0011, 6},
{0x0005, 4},{0x0001, 3},{0x0043, 8},{0x0042, 8}
},
{
{0x0004, 3},{0x000D, 4},{0x0007, 4},{0x0002, 3},
{0x0014, 5},{0x016C, 9},{0x16D1,13},{0x02DF,10},
{0x016E, 9},{0x0000, 2},{0x0007, 3},{0x002C, 6},
{0x002B, 6},{0x02DE,10},{0x16D0,13},{0x16D3,13},
{0x16D2,13},{0x2DB5,14},{0x2DB4,14},{0x2DB7,14},
{0x2DB6,14},{0x16D9,13},{0x16D8,13},{0x000C, 5},
{0x002A, 6},{0x005A, 7},{0x001B, 6},{0x001A, 6},
{0x0017, 5},{0x000C, 4},{0x05B7,11},{0x05B5,11}
},
{
{0x0002, 2},{0x000F, 4},{0x001C, 5},{0x000C, 4},
{0x003B, 6},{0x01AC, 9},{0x1AD8,13},{0x35B3,14},
{0x35B2,14},{0x0001, 2},{0x0000, 2},{0x0069, 7},
{0x0068, 7},{0x35BD,14},{0x35BC,14},{0x35BF,14},
{0x35BE,14},{0x35B9,14},{0x35B8,14},{0x35BB,14},
{0x35BA,14},{0x35B5,14},{0x35B4,14},{0x01A9, 9},
{0x01A8, 9},{0x035A,10},{0x00D7, 8},{0x00D5, 8},
{0x003A, 6},{0x001B, 5},{0x35B7,14},{0x35B6,14}
},
{
{0x0000, 3},{0x0010, 5},{0x0072, 7},{0x0071, 7},
{0x0154, 9},{0x0AAB,12},{0x0AA8,12},{0x0014, 5},
{0x0070, 7},{0x0002, 3},{0x0003, 3},{0x000C, 4},
{0x000B, 4},{0x0003, 4},{0x0011, 5},{0x0073, 7},
{0x0054, 7},{0x00AB, 8},{0x02AB,10},{0x1553,13},
{0x1552,13},{0x1555,13},{0x1554,13},{0x000D, 4},
{0x001E, 5},{0x0012, 5},{0x003E, 6},{0x002B, 6},
{0x0002, 4},{0x003F, 6},{0x001D, 5},{0x0013, 5}
},
{
{0x0003, 3},{0x001F, 5},{0x0029, 6},{0x003D, 6},
{0x000C, 7},{0x0069,10},{0x0345,13},{0x0002, 5},
{0x0028, 6},{0x0002, 3},{0x0001, 3},{0x000E, 4},
{0x000C, 4},{0x0015, 5},{0x0007, 6},{0x001B, 8},
{0x006B,10},{0x006A,10},{0x0344,13},{0x0347,13},
{0x0346,13},{0x01A1,12},{0x01A0,12},{0x000B, 4},
{0x001A, 5},{0x0012, 5},{0x0000, 5},{0x003C, 6},
{0x0008, 4},{0x001B, 5},{0x0013, 5},{0x0001, 5}
},
{
{0x0004, 3},{0x0004, 4},{0x003F, 6},{0x0014, 5},
{0x0056, 7},{0x015C, 9},{0x15D5,13},{0x003C, 6},
{0x002A, 6},{0x0000, 3},{0x0001, 3},{0x000E, 4},
{0x000D, 4},{0x000C, 5},{0x00AF, 8},{0x02BB,10},
{0x15D4,13},{0x15D7,13},{0x15D6,13},{0x15D1,13},
{0x15D0,13},{0x15D3,13},{0x15D2,13},{0x000B, 4},
{0x0019, 5},{0x000D, 5},{0x003E, 6},{0x0031, 6},
{0x0007, 4},{0x0005, 4},{0x003D, 6},{0x0030, 6}
},
{
{0x0005, 3},{0x0008, 4},{0x001A, 5},{0x0000, 4},
{0x0036, 6},{0x0011, 8},{0x0106,12},{0x000A, 7},
{0x006E, 7},{0x0002, 3},{0x0003, 3},{0x0003, 4},
{0x0002, 4},{0x006F, 7},{0x0021, 9},{0x020F,13},
{0x020E,13},{0x0101,12},{0x0100,12},{0x0103,12},
{0x0102,12},{0x0105,12},{0x0104,12},{0x000C, 4},
{0x001E, 5},{0x0003, 5},{0x003E, 6},{0x003F, 6},
{0x0009, 4},{0x000E, 4},{0x000B, 7},{0x0009, 7}
},
{
{0x0002, 3},{0x000E, 4},{0x001E, 5},{0x000C, 4},
{0x001F, 5},{0x006E, 7},{0x00AD,10},{0x00AF,10},
{0x0014, 7},{0x0004, 3},{0x0003, 3},{0x001A, 5},
{0x0017, 5},{0x002A, 8},{0x0576,13},{0x0AEF,14},
{0x0AEE,14},{0x0571,13},{0x0570,13},{0x0573,13},
{0x0572,13},{0x0575,13},{0x0574,13},{0x0003, 4},
{0x0016, 5},{0x0004, 5},{0x0036, 6},{0x000B, 6},
{0x000A, 4},{0x0000, 3},{0x006F, 7},{0x00AC,10}
},
{
{0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3},
{0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13},
{0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6},
{0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13},
{0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13},
{0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3},
{0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8},
{0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14}
},
{
{0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3},
{0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13},
{0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6},
{0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13},
{0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13},
{0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3},
{0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8},
{0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14}
},
{
{0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3},
{0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13},
{0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6},
{0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13},
{0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13},
{0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3},
{0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8},
{0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14}
},
{
{0x0003, 3},{0x0011, 5},{0x0020, 6},{0x0074, 7},
{0x010D, 9},{0x0863,12},{0x0860,12},{0x000A, 5},
{0x0075, 7},{0x0001, 3},{0x0000, 3},{0x000B, 4},
{0x000A, 4},{0x0018, 5},{0x0038, 6},{0x0042, 7},
{0x010F, 9},{0x010E, 9},{0x0219,10},{0x10C3,13},
{0x10C2,13},{0x10C5,13},{0x10C4,13},{0x000F, 4},
{0x0004, 4},{0x0019, 5},{0x000B, 5},{0x0039, 6},
{0x0009, 4},{0x001B, 5},{0x001A, 5},{0x003B, 6}
},
{
{0x0005, 3},{0x0001, 4},{0x003E, 6},{0x0001, 5},
{0x00E2, 8},{0x1C6F,13},{0x38D9,14},{0x0039, 6},
{0x001F, 6},{0x0002, 3},{0x0001, 3},{0x0009, 4},
{0x0008, 4},{0x0000, 5},{0x0070, 7},{0x01C7, 9},
{0x038C,10},{0x071A,11},{0x38D8,14},{0x38DB,14},
{0x38DA,14},{0x38DD,14},{0x38DC,14},{0x000D, 4},
{0x001D, 5},{0x000E, 5},{0x003F, 6},{0x003C, 6},
{0x000C, 4},{0x0006, 4},{0x003D, 6},{0x001E, 6}
},
{
{0x0006, 3},{0x000B, 4},{0x0011, 5},{0x001E, 5},
{0x0074, 7},{0x03AA,10},{0x1D5C,13},{0x0001, 6},
{0x0021, 6},{0x0001, 3},{0x0002, 3},{0x0007, 4},
{0x0006, 4},{0x003E, 6},{0x00EB, 8},{0x01D4, 9},
{0x0EAF,12},{0x3ABB,14},{0x3ABA,14},{0x1D59,13},
{0x1D58,13},{0x1D5B,13},{0x1D5A,13},{0x000A, 4},
{0x001C, 5},{0x0001, 5},{0x003F, 6},{0x003B, 6},
{0x0001, 4},{0x0009, 4},{0x0020, 6},{0x0000, 6}
},
{
{0x0004, 3},{0x000A, 4},{0x0017, 5},{0x0004, 4},
{0x0016, 6},{0x016A, 9},{0x16B1,13},{0x0017, 7},
{0x005B, 7},{0x0006, 3},{0x0007, 3},{0x0001, 4},
{0x0000, 4},{0x000A, 6},{0x02D7,10},{0x0B5A,12},
{0x16B0,13},{0x16B3,13},{0x16B2,13},{0x2D6D,14},
{0x2D6C,14},{0x2D6F,14},{0x2D6E,14},{0x0006, 4},
{0x000A, 5},{0x0004, 5},{0x002C, 6},{0x0017, 6},
{0x0003, 4},{0x0007, 4},{0x0016, 7},{0x00B4, 8}
},
{
{0x0005, 3},{0x000D, 4},{0x0005, 4},{0x0009, 4},
{0x0033, 6},{0x0193, 9},{0x192C,13},{0x0061, 8},
{0x0031, 7},{0x0000, 2},{0x0007, 3},{0x0010, 5},
{0x0011, 5},{0x00C8, 8},{0x192F,13},{0x325B,14},
{0x325A,14},{0x1929,13},{0x1928,13},{0x192B,13},
{0x192A,13},{0x325D,14},{0x325C,14},{0x0018, 5},
{0x001A, 6},{0x001B, 6},{0x0065, 7},{0x0019, 6},
{0x0004, 4},{0x0007, 4},{0x0060, 8},{0x0324,10}
},
{
{0x0006, 3},{0x0000, 3},{0x0002, 4},{0x000F, 4},
{0x0039, 6},{0x01D9, 9},{0x1D82,13},{0x0761,11},
{0x03BE,10},{0x0001, 2},{0x0002, 2},{0x000F, 6},
{0x000E, 6},{0x0762,11},{0x3B07,14},{0x3B06,14},
{0x3B1D,14},{0x3B1C,14},{0x3B1F,14},{0x3B1E,14},
{0x3B19,14},{0x3B18,14},{0x3B1B,14},{0x0038, 6},
{0x01DE, 9},{0x00ED, 8},{0x03BF,10},{0x00EE, 8},
{0x003A, 6},{0x0006, 5},{0x0EC0,12},{0x3B1A,14}
},
{
{0x0000, 2},{0x0002, 3},{0x000F, 5},{0x0006, 4},
{0x001C, 6},{0x01D0,10},{0x0E8C,13},{0x1D1B,14},
{0x1D1A,14},{0x0003, 2},{0x0002, 2},{0x00EA, 9},
{0x00E9, 9},{0x0E89,13},{0x0E88,13},{0x0E8B,13},
{0x0E8A,13},{0x1D65,14},{0x1D64,14},{0x1D67,14},
{0x1D66,14},{0x1D61,14},{0x1D60,14},{0x03AD,11},
{0x1D63,14},{0x1D62,14},{0x1D1D,14},{0x1D1C,14},
{0x003B, 7},{0x01D7,10},{0x1D1F,14},{0x1D1E,14}
},
{
{0x0002, 2},{0x000F, 4},{0x001C, 5},{0x000C, 4},
{0x003B, 6},{0x01AC, 9},{0x1AD8,13},{0x35B3,14},
{0x35B2,14},{0x0001, 2},{0x0000, 2},{0x0069, 7},
{0x0068, 7},{0x35BD,14},{0x35BC,14},{0x35BF,14},
{0x35BE,14},{0x35B9,14},{0x35B8,14},{0x35BB,14},
{0x35BA,14},{0x35B5,14},{0x35B4,14},{0x01A9, 9},
{0x01A8, 9},{0x035A,10},{0x00D7, 8},{0x00D5, 8},
{0x003A, 6},{0x001B, 5},{0x35B7,14},{0x35B6,14}
}
};
/*A description of a Huffman code value used when encoding the tree.*/
typedef struct{
/*The bit pattern, left-shifted so that the MSB of all patterns is
aligned.*/
ogg_uint32_t pattern;
/*The amount the bit pattern was shifted.*/
int shift;
/*The token this bit pattern represents.*/
int token;
}oc_huff_entry;
/*Compares two oc_huff_entry structures by their bit patterns.
_c1: The first entry to compare.
_c2: The second entry to compare.
Return: <0 if _c1<_c2, >0 if _c1>_c2.*/
static int huff_entry_cmp(const void *_c1,const void *_c2){
ogg_uint32_t b1;
ogg_uint32_t b2;
b1=((const oc_huff_entry *)_c1)->pattern;
b2=((const oc_huff_entry *)_c2)->pattern;
return b1<b2?-1:b1>b2?1:0;
}
/*Encodes a description of the given Huffman tables.
Although the codes are stored in the encoder as flat arrays, in the bit
stream and in the decoder they are structured as a tree.
This function recovers the tree structure from the flat array and then
writes it out.
Note that the codes MUST form a Huffman code, and not merely a prefix-free
code, since the binary tree is assumed to be full.
_opb: The buffer to store the tree in.
_codes: The Huffman tables to pack.
Return: 0 on success, or a negative value if one of the given Huffman tables
does not form a full, prefix-free code.*/
int oc_huff_codes_pack(oggpack_buffer *_opb,
const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){
int i;
for(i=0;i<TH_NHUFFMAN_TABLES;i++){
oc_huff_entry entries[TH_NDCT_TOKENS];
int bpos;
int maxlen;
int mask;
int j;
/*First, find the maximum code length so we can align all the bit
patterns.*/
maxlen=_codes[i][0].nbits;
for(j=1;j<TH_NDCT_TOKENS;j++){
maxlen=OC_MAXI(_codes[i][j].nbits,maxlen);
}
mask=(1<<(maxlen>>1)<<(maxlen+1>>1))-1;
/*Copy over the codes into our temporary workspace.
The bit patterns are aligned, and the original entry each code is from
is stored as well.*/
for(j=0;j<TH_NDCT_TOKENS;j++){
entries[j].shift=maxlen-_codes[i][j].nbits;
entries[j].pattern=_codes[i][j].pattern<<entries[j].shift&mask;
entries[j].token=j;
}
/*Sort the codes into ascending order.
This is the order the leaves of the tree will be traversed.*/
qsort(entries,TH_NDCT_TOKENS,sizeof(entries[0]),huff_entry_cmp);
/*For each leaf of the tree:*/
bpos=maxlen;
for(j=0;j<TH_NDCT_TOKENS;j++){
int bit;
/*If this code has any bits at all.*/
if(entries[j].shift<maxlen){
/*Descend into the tree, writing a bit for each branch.*/
for(;bpos>entries[j].shift;bpos--)oggpackB_write(_opb,0,1);
/*Mark this as a leaf node, and write its value.*/
oggpackB_write(_opb,1,1);
oggpackB_write(_opb,entries[j].token,5);
/*For each 1 branch we've descended, back up the tree until we reach a
0 branch.*/
bit=1<<bpos;
for(;entries[j].pattern&bit;bpos++)bit<<=1;
/*Validate the code.*/
if(j+1<TH_NDCT_TOKENS){
mask=~(bit-1)<<1;
/*The next entry should have a 1 bit where we had a 0, and should
match our code above that bit.
This verifies both fullness and prefix-freeness simultaneously.*/
if(!(entries[j+1].pattern&bit)||
(entries[j].pattern&mask)!=(entries[j+1].pattern&mask)){
return TH_EINVAL;
}
}
/*If there are no more codes, we should have ascended back to the top
of the tree.*/
else if(bpos<maxlen)return TH_EINVAL;
}
}
}
return 0;
}

View File

@ -0,0 +1,19 @@
#if !defined(_huffenc_H)
# define _huffenc_H (1)
# include "huffman.h"
typedef th_huff_code th_huff_table[TH_NDCT_TOKENS];
extern const th_huff_code
TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
int oc_huff_codes_pack(oggpack_buffer *_opb,
const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]);
#endif

View File

@ -1,309 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: huffman.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include "encoder_internal.h"
#include "hufftables.h"
static void CreateHuffmanList(HUFF_ENTRY ** HuffRoot,
ogg_uint32_t HIndex, ogg_uint32_t *FreqList ) {
int i;
HUFF_ENTRY *entry_ptr;
HUFF_ENTRY *search_ptr;
/* Create a HUFF entry for token zero. */
HuffRoot[HIndex] = (HUFF_ENTRY *)_ogg_calloc(1,sizeof(*HuffRoot[HIndex]));
HuffRoot[HIndex]->Previous = NULL;
HuffRoot[HIndex]->Next = NULL;
HuffRoot[HIndex]->ZeroChild = NULL;
HuffRoot[HIndex]->OneChild = NULL;
HuffRoot[HIndex]->Value = 0;
HuffRoot[HIndex]->Frequency = FreqList[0];
if ( HuffRoot[HIndex]->Frequency == 0 )
HuffRoot[HIndex]->Frequency = 1;
/* Now add entries for all the other possible tokens. */
for ( i = 1; i < MAX_ENTROPY_TOKENS; i++ ) {
entry_ptr = (HUFF_ENTRY *)_ogg_calloc(1,sizeof(*entry_ptr));
entry_ptr->Value = i;
entry_ptr->Frequency = FreqList[i];
entry_ptr->ZeroChild = NULL;
entry_ptr->OneChild = NULL;
/* Force min value of 1. This prevents the tree getting too deep. */
if ( entry_ptr->Frequency == 0 )
entry_ptr->Frequency = 1;
if ( entry_ptr->Frequency <= HuffRoot[HIndex]->Frequency ){
entry_ptr->Next = HuffRoot[HIndex];
HuffRoot[HIndex]->Previous = entry_ptr;
entry_ptr->Previous = NULL;
HuffRoot[HIndex] = entry_ptr;
}else{
search_ptr = HuffRoot[HIndex];
while ( (search_ptr->Next != NULL) &&
(search_ptr->Frequency < entry_ptr->Frequency) ){
search_ptr = (HUFF_ENTRY *)search_ptr->Next;
}
if ( search_ptr->Frequency < entry_ptr->Frequency ){
entry_ptr->Next = NULL;
entry_ptr->Previous = search_ptr;
search_ptr->Next = entry_ptr;
}else{
entry_ptr->Next = search_ptr;
entry_ptr->Previous = search_ptr->Previous;
search_ptr->Previous->Next = entry_ptr;
search_ptr->Previous = entry_ptr;
}
}
}
}
static void CreateCodeArray( HUFF_ENTRY * HuffRoot,
ogg_uint32_t *HuffCodeArray,
unsigned char *HuffCodeLengthArray,
ogg_uint32_t CodeValue,
unsigned char CodeLength ) {
/* If we are at a leaf then fill in a code array entry. */
if ( ( HuffRoot->ZeroChild == NULL ) && ( HuffRoot->OneChild == NULL ) ){
HuffCodeArray[HuffRoot->Value] = CodeValue;
HuffCodeLengthArray[HuffRoot->Value] = CodeLength;
}else{
/* Recursive calls to scan down the tree. */
CodeLength++;
CreateCodeArray(HuffRoot->ZeroChild, HuffCodeArray, HuffCodeLengthArray,
((CodeValue << 1) + 0), CodeLength);
CreateCodeArray(HuffRoot->OneChild, HuffCodeArray, HuffCodeLengthArray,
((CodeValue << 1) + 1), CodeLength);
}
}
static void BuildHuffmanTree( HUFF_ENTRY **HuffRoot,
ogg_uint32_t *HuffCodeArray,
unsigned char *HuffCodeLengthArray,
ogg_uint32_t HIndex,
ogg_uint32_t *FreqList ){
HUFF_ENTRY *entry_ptr;
HUFF_ENTRY *search_ptr;
/* First create a sorted linked list representing the frequencies of
each token. */
CreateHuffmanList( HuffRoot, HIndex, FreqList );
/* Now build the tree from the list. */
/* While there are at least two items left in the list. */
while ( HuffRoot[HIndex]->Next != NULL ){
/* Create the new node as the parent of the first two in the list. */
entry_ptr = (HUFF_ENTRY *)_ogg_calloc(1,sizeof(*entry_ptr));
entry_ptr->Value = -1;
entry_ptr->Frequency = HuffRoot[HIndex]->Frequency +
HuffRoot[HIndex]->Next->Frequency ;
entry_ptr->ZeroChild = HuffRoot[HIndex];
entry_ptr->OneChild = HuffRoot[HIndex]->Next;
/* If there are still more items in the list then insert the new
node into the list. */
if (entry_ptr->OneChild->Next != NULL ){
/* Set up the provisional 'new root' */
HuffRoot[HIndex] = entry_ptr->OneChild->Next;
HuffRoot[HIndex]->Previous = NULL;
/* Now scan through the remaining list to insert the new entry
at the appropriate point. */
if ( entry_ptr->Frequency <= HuffRoot[HIndex]->Frequency ){
entry_ptr->Next = HuffRoot[HIndex];
HuffRoot[HIndex]->Previous = entry_ptr;
entry_ptr->Previous = NULL;
HuffRoot[HIndex] = entry_ptr;
}else{
search_ptr = HuffRoot[HIndex];
while ( (search_ptr->Next != NULL) &&
(search_ptr->Frequency < entry_ptr->Frequency) ){
search_ptr = search_ptr->Next;
}
if ( search_ptr->Frequency < entry_ptr->Frequency ){
entry_ptr->Next = NULL;
entry_ptr->Previous = search_ptr;
search_ptr->Next = entry_ptr;
}else{
entry_ptr->Next = search_ptr;
entry_ptr->Previous = search_ptr->Previous;
search_ptr->Previous->Next = entry_ptr;
search_ptr->Previous = entry_ptr;
}
}
}else{
/* Build has finished. */
entry_ptr->Next = NULL;
entry_ptr->Previous = NULL;
HuffRoot[HIndex] = entry_ptr;
}
/* Delete the Next/Previous properties of the children (PROB NOT NEC). */
entry_ptr->ZeroChild->Next = NULL;
entry_ptr->ZeroChild->Previous = NULL;
entry_ptr->OneChild->Next = NULL;
entry_ptr->OneChild->Previous = NULL;
}
/* Now build a code array from the tree. */
CreateCodeArray( HuffRoot[HIndex], HuffCodeArray,
HuffCodeLengthArray, 0, 0);
}
static void DestroyHuffTree(HUFF_ENTRY *root_ptr){
if (root_ptr){
if ( root_ptr->ZeroChild )
DestroyHuffTree(root_ptr->ZeroChild);
if ( root_ptr->OneChild )
DestroyHuffTree(root_ptr->OneChild);
_ogg_free(root_ptr);
}
}
void ClearHuffmanSet( PB_INSTANCE *pbi ){
int i;
ClearHuffmanTrees(pbi->HuffRoot_VP3x);
for ( i = 0; i < NUM_HUFF_TABLES; i++ )
if (pbi->HuffCodeArray_VP3x[i])
_ogg_free (pbi->HuffCodeArray_VP3x[i]);
for ( i = 0; i < NUM_HUFF_TABLES; i++ )
if (pbi->HuffCodeLengthArray_VP3x[i])
_ogg_free (pbi->HuffCodeLengthArray_VP3x[i]);
}
void InitHuffmanSet( PB_INSTANCE *pbi ){
int i;
ClearHuffmanSet(pbi);
pbi->ExtraBitLengths_VP3x = ExtraBitLengths_VP31;
for ( i = 0; i < NUM_HUFF_TABLES; i++ ){
pbi->HuffCodeArray_VP3x[i] =
_ogg_calloc(MAX_ENTROPY_TOKENS,
sizeof(*pbi->HuffCodeArray_VP3x[i]));
pbi->HuffCodeLengthArray_VP3x[i] =
_ogg_calloc(MAX_ENTROPY_TOKENS,
sizeof(*pbi->HuffCodeLengthArray_VP3x[i]));
BuildHuffmanTree( pbi->HuffRoot_VP3x,
pbi->HuffCodeArray_VP3x[i],
pbi->HuffCodeLengthArray_VP3x[i],
i, FrequencyCounts_VP3[i]);
}
}
static int ReadHuffTree(HUFF_ENTRY * HuffRoot, int depth,
oggpack_buffer *opb) {
long bit;
long ret;
theora_read(opb,1,&bit);
if(bit < 0) return OC_BADHEADER;
else if(!bit) {
int ret;
if (++depth > 32) return OC_BADHEADER;
HuffRoot->ZeroChild = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY));
ret = ReadHuffTree(HuffRoot->ZeroChild, depth, opb);
if (ret < 0) return ret;
HuffRoot->OneChild = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY));
ret = ReadHuffTree(HuffRoot->OneChild, depth, opb);
if (ret < 0) return ret;
HuffRoot->Value = -1;
} else {
HuffRoot->ZeroChild = NULL;
HuffRoot->OneChild = NULL;
theora_read(opb,5,&ret);
HuffRoot->Value=ret;;
if (HuffRoot->Value < 0) return OC_BADHEADER;
}
return 0;
}
int ReadHuffmanTrees(codec_setup_info *ci, oggpack_buffer *opb) {
int i;
for (i=0; i<NUM_HUFF_TABLES; i++) {
int ret;
ci->HuffRoot[i] = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY));
ret = ReadHuffTree(ci->HuffRoot[i], 0, opb);
if (ret) return ret;
}
return 0;
}
static void WriteHuffTree(HUFF_ENTRY *HuffRoot, oggpack_buffer *opb) {
if (HuffRoot->Value >= 0) {
oggpackB_write(opb, 1, 1);
oggpackB_write(opb, HuffRoot->Value, 5);
} else {
oggpackB_write(opb, 0, 1);
WriteHuffTree(HuffRoot->ZeroChild, opb);
WriteHuffTree(HuffRoot->OneChild, opb);
}
}
void WriteHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES],
oggpack_buffer *opb) {
int i;
for(i=0; i<NUM_HUFF_TABLES; i++) {
WriteHuffTree(HuffRoot[i], opb);
}
}
static HUFF_ENTRY *CopyHuffTree(const HUFF_ENTRY *HuffSrc) {
if(HuffSrc){
HUFF_ENTRY *HuffDst;
HuffDst = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY));
HuffDst->Value = HuffSrc->Value;
if (HuffSrc->Value < 0) {
HuffDst->ZeroChild = CopyHuffTree(HuffSrc->ZeroChild);
HuffDst->OneChild = CopyHuffTree(HuffSrc->OneChild);
}
return HuffDst;
}
return NULL;
}
void InitHuffmanTrees(PB_INSTANCE *pbi, const codec_setup_info *ci) {
int i;
pbi->ExtraBitLengths_VP3x = ExtraBitLengths_VP31;
for(i=0; i<NUM_HUFF_TABLES; i++){
pbi->HuffRoot_VP3x[i] = CopyHuffTree(ci->HuffRoot[i]);
}
}
void ClearHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES]){
int i;
for(i=0; i<NUM_HUFF_TABLES; i++) {
DestroyHuffTree(HuffRoot[i]);
HuffRoot[i] = NULL;
}
}

View File

@ -5,70 +5,66 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: huffman.h,v 1.1 2004/02/24 13:50:13 shatty Exp $
last mod: $Id$
********************************************************************/
/********************************************************************
* Constants
********************************************************************/
#define NUM_HUFF_TABLES 80
#define DC_HUFF_OFFSET 0
#define AC_HUFF_OFFSET 16
#define AC_TABLE_2_THRESH 5
#define AC_TABLE_3_THRESH 14
#define AC_TABLE_4_THRESH 27
#if !defined(_huffman_H)
# define _hufffman_H (1)
# include "theora/codec.h"
# include "ocintrin.h"
#define DC_HUFF_CHOICES 16
#define DC_HUFF_CHOICE_BITS 4
/*The range of valid quantized DCT coefficient values.
VP3 used 511 in the encoder, but the bitstream is capable of 580.*/
#define OC_DCT_VAL_RANGE (580)
#define AC_HUFF_CHOICES 16
#define AC_HUFF_CHOICE_BITS 4
#define OC_NDCT_TOKEN_BITS (5)
/* Constants assosciated with entropy tokenisation. */
#define MAX_SINGLE_TOKEN_VALUE 6
#define DCT_VAL_CAT2_MIN 3
#define DCT_VAL_CAT3_MIN 7
#define DCT_VAL_CAT4_MIN 9
#define DCT_VAL_CAT5_MIN 13
#define DCT_VAL_CAT6_MIN 21
#define DCT_VAL_CAT7_MIN 37
#define DCT_VAL_CAT8_MIN 69
#define OC_DCT_EOB1_TOKEN (0)
#define OC_DCT_EOB2_TOKEN (1)
#define OC_DCT_EOB3_TOKEN (2)
#define OC_DCT_REPEAT_RUN0_TOKEN (3)
#define OC_DCT_REPEAT_RUN1_TOKEN (4)
#define OC_DCT_REPEAT_RUN2_TOKEN (5)
#define OC_DCT_REPEAT_RUN3_TOKEN (6)
#define DCT_EOB_TOKEN 0
#define DCT_EOB_PAIR_TOKEN 1
#define DCT_EOB_TRIPLE_TOKEN 2
#define DCT_REPEAT_RUN_TOKEN 3
#define DCT_REPEAT_RUN2_TOKEN 4
#define DCT_REPEAT_RUN3_TOKEN 5
#define DCT_REPEAT_RUN4_TOKEN 6
#define OC_DCT_SHORT_ZRL_TOKEN (7)
#define OC_DCT_ZRL_TOKEN (8)
#define DCT_SHORT_ZRL_TOKEN 7
#define DCT_ZRL_TOKEN 8
#define OC_ONE_TOKEN (9)
#define OC_MINUS_ONE_TOKEN (10)
#define OC_TWO_TOKEN (11)
#define OC_MINUS_TWO_TOKEN (12)
#define ONE_TOKEN 9 /* Special tokens for -1,1,-2,2 */
#define MINUS_ONE_TOKEN 10
#define TWO_TOKEN 11
#define MINUS_TWO_TOKEN 12
#define OC_DCT_VAL_CAT2 (13)
#define OC_DCT_VAL_CAT3 (17)
#define OC_DCT_VAL_CAT4 (18)
#define OC_DCT_VAL_CAT5 (19)
#define OC_DCT_VAL_CAT6 (20)
#define OC_DCT_VAL_CAT7 (21)
#define OC_DCT_VAL_CAT8 (22)
#define LOW_VAL_TOKENS (MINUS_TWO_TOKEN + 1)
#define DCT_VAL_CATEGORY3 (LOW_VAL_TOKENS + 4)
#define DCT_VAL_CATEGORY4 (DCT_VAL_CATEGORY3 + 1)
#define DCT_VAL_CATEGORY5 (DCT_VAL_CATEGORY4 + 1)
#define DCT_VAL_CATEGORY6 (DCT_VAL_CATEGORY5 + 1)
#define DCT_VAL_CATEGORY7 (DCT_VAL_CATEGORY6 + 1)
#define DCT_VAL_CATEGORY8 (DCT_VAL_CATEGORY7 + 1)
#define OC_DCT_RUN_CAT1A (23)
#define OC_DCT_RUN_CAT1B (28)
#define OC_DCT_RUN_CAT1C (29)
#define OC_DCT_RUN_CAT2A (30)
#define OC_DCT_RUN_CAT2B (31)
#define DCT_RUN_CATEGORY1 (DCT_VAL_CATEGORY8 + 1)
#define DCT_RUN_CATEGORY1B (DCT_RUN_CATEGORY1 + 5)
#define DCT_RUN_CATEGORY1C (DCT_RUN_CATEGORY1B + 1)
#define DCT_RUN_CATEGORY2 (DCT_RUN_CATEGORY1C + 1)
#define OC_NDCT_EOB_TOKEN_MAX (7)
#define OC_NDCT_ZRL_TOKEN_MAX (9)
#define OC_NDCT_VAL_MAX (23)
#define OC_NDCT_VAL_CAT1_MAX (13)
#define OC_NDCT_VAL_CAT2_MAX (17)
#define OC_NDCT_VAL_CAT2_SIZE (OC_NDCT_VAL_CAT2_MAX-OC_DCT_VAL_CAT2)
#define OC_NDCT_RUN_MAX (32)
#define OC_NDCT_RUN_CAT1A_MAX (28)
/* 35 */
#define MAX_ENTROPY_TOKENS (DCT_RUN_CATEGORY2 + 2)
extern const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS];
#endif

File diff suppressed because it is too large Load Diff

View File

@ -5,468 +5,331 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: idct.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
last mod: $Id$
********************************************************************/
#include <string.h>
#include "encoder_internal.h"
#include "quant_lookup.h"
#include "internal.h"
#include "dct.h"
#define IdctAdjustBeforeShift 8
#define xC1S7 64277
#define xC2S6 60547
#define xC3S5 54491
#define xC4S4 46341
#define xC5S3 36410
#define xC6S2 25080
#define xC7S1 12785
static void dequant_slow( ogg_int16_t * dequant_coeffs,
ogg_int16_t * quantized_list,
ogg_int32_t * DCT_block) {
int i;
for(i=0;i<64;i++)
DCT_block[dequant_index[i]] = quantized_list[i] * dequant_coeffs[i];
/*Performs an inverse 8 point Type-II DCT transform.
The output is scaled by a factor of 2 relative to the orthonormal version of
the transform.
_y: The buffer to store the result in.
Data will be placed in every 8th entry (e.g., in a column of an 8x8
block).
_x: The input coefficients.
The first 8 entries are used (e.g., from a row of an 8x8 block).*/
static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){
ogg_int32_t t[8];
ogg_int32_t r;
/*Stage 1:*/
/*0-1 butterfly.*/
t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16;
t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16;
/*2-3 rotation by 6pi/16.*/
t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16);
t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16);
/*4-7 rotation by 7pi/16.*/
t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16);
/*5-6 rotation by 3pi/16.*/
t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16);
t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16);
t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16);
/*Stage 2:*/
/*4-5 butterfly.*/
r=t[4]+t[5];
t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
t[4]=r;
/*7-6 butterfly.*/
r=t[7]+t[6];
t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
t[7]=r;
/*Stage 3:*/
/*0-3 butterfly.*/
r=t[0]+t[3];
t[3]=t[0]-t[3];
t[0]=r;
/*1-2 butterfly.*/
r=t[1]+t[2];
t[2]=t[1]-t[2];
t[1]=r;
/*6-5 butterfly.*/
r=t[6]+t[5];
t[5]=t[6]-t[5];
t[6]=r;
/*Stage 4:*/
/*0-7 butterfly.*/
_y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
/*1-6 butterfly.*/
_y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
/*2-5 butterfly.*/
_y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
/*3-4 butterfly.*/
_y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
_y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
_y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
_y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
_y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
}
void IDctSlow( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData ) {
ogg_int32_t IntermediateData[64];
ogg_int32_t * ip = IntermediateData;
ogg_int16_t * op = OutputData;
ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
ogg_int32_t t1, t2;
int loop;
dequant_slow( QuantMatrix, InputData, IntermediateData);
/* Inverse DCT on the rows now */
for ( loop = 0; loop < 8; loop++){
/* Check for non-zero values */
if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
t1 = (xC1S7 * ip[1]);
t2 = (xC7S1 * ip[7]);
t1 >>= 16;
t2 >>= 16;
_A = t1 + t2;
t1 = (xC7S1 * ip[1]);
t2 = (xC1S7 * ip[7]);
t1 >>= 16;
t2 >>= 16;
_B = t1 - t2;
t1 = (xC3S5 * ip[3]);
t2 = (xC5S3 * ip[5]);
t1 >>= 16;
t2 >>= 16;
_C = t1 + t2;
t1 = (xC3S5 * ip[5]);
t2 = (xC5S3 * ip[3]);
t1 >>= 16;
t2 >>= 16;
_D = t1 - t2;
t1 = (xC4S4 * (_A - _C));
t1 >>= 16;
_Ad = t1;
t1 = (xC4S4 * (_B - _D));
t1 >>= 16;
_Bd = t1;
_Cd = _A + _C;
_Dd = _B + _D;
t1 = (xC4S4 * (ip[0] + ip[4]));
t1 >>= 16;
_E = t1;
t1 = (xC4S4 * (ip[0] - ip[4]));
t1 >>= 16;
_F = t1;
t1 = (xC2S6 * ip[2]);
t2 = (xC6S2 * ip[6]);
t1 >>= 16;
t2 >>= 16;
_G = t1 + t2;
t1 = (xC6S2 * ip[2]);
t2 = (xC2S6 * ip[6]);
t1 >>= 16;
t2 >>= 16;
_H = t1 - t2;
_Ed = _E - _G;
_Gd = _E + _G;
_Add = _F + _Ad;
_Bdd = _Bd - _H;
_Fd = _F - _Ad;
_Hd = _Bd + _H;
/* Final sequence of operations over-write original inputs. */
ip[0] = (ogg_int16_t)((_Gd + _Cd ) >> 0);
ip[7] = (ogg_int16_t)((_Gd - _Cd ) >> 0);
ip[1] = (ogg_int16_t)((_Add + _Hd ) >> 0);
ip[2] = (ogg_int16_t)((_Add - _Hd ) >> 0);
ip[3] = (ogg_int16_t)((_Ed + _Dd ) >> 0);
ip[4] = (ogg_int16_t)((_Ed - _Dd ) >> 0);
ip[5] = (ogg_int16_t)((_Fd + _Bdd ) >> 0);
ip[6] = (ogg_int16_t)((_Fd - _Bdd ) >> 0);
}
ip += 8; /* next row */
}
ip = IntermediateData;
for ( loop = 0; loop < 8; loop++){
/* Check for non-zero values (bitwise or faster than ||) */
if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
t1 = (xC1S7 * ip[1*8]);
t2 = (xC7S1 * ip[7*8]);
t1 >>= 16;
t2 >>= 16;
_A = t1 + t2;
t1 = (xC7S1 * ip[1*8]);
t2 = (xC1S7 * ip[7*8]);
t1 >>= 16;
t2 >>= 16;
_B = t1 - t2;
t1 = (xC3S5 * ip[3*8]);
t2 = (xC5S3 * ip[5*8]);
t1 >>= 16;
t2 >>= 16;
_C = t1 + t2;
t1 = (xC3S5 * ip[5*8]);
t2 = (xC5S3 * ip[3*8]);
t1 >>= 16;
t2 >>= 16;
_D = t1 - t2;
t1 = (xC4S4 * (_A - _C));
t1 >>= 16;
_Ad = t1;
t1 = (xC4S4 * (_B - _D));
t1 >>= 16;
_Bd = t1;
_Cd = _A + _C;
_Dd = _B + _D;
t1 = (xC4S4 * (ip[0*8] + ip[4*8]));
t1 >>= 16;
_E = t1;
t1 = (xC4S4 * (ip[0*8] - ip[4*8]));
t1 >>= 16;
_F = t1;
t1 = (xC2S6 * ip[2*8]);
t2 = (xC6S2 * ip[6*8]);
t1 >>= 16;
t2 >>= 16;
_G = t1 + t2;
t1 = (xC6S2 * ip[2*8]);
t2 = (xC2S6 * ip[6*8]);
t1 >>= 16;
t2 >>= 16;
_H = t1 - t2;
_Ed = _E - _G;
_Gd = _E + _G;
_Add = _F + _Ad;
_Bdd = _Bd - _H;
_Fd = _F - _Ad;
_Hd = _Bd + _H;
_Gd += IdctAdjustBeforeShift;
_Add += IdctAdjustBeforeShift;
_Ed += IdctAdjustBeforeShift;
_Fd += IdctAdjustBeforeShift;
/* Final sequence of operations over-write original inputs. */
op[0*8] = (ogg_int16_t)((_Gd + _Cd ) >> 4);
op[7*8] = (ogg_int16_t)((_Gd - _Cd ) >> 4);
op[1*8] = (ogg_int16_t)((_Add + _Hd ) >> 4);
op[2*8] = (ogg_int16_t)((_Add - _Hd ) >> 4);
op[3*8] = (ogg_int16_t)((_Ed + _Dd ) >> 4);
op[4*8] = (ogg_int16_t)((_Ed - _Dd ) >> 4);
op[5*8] = (ogg_int16_t)((_Fd + _Bdd ) >> 4);
op[6*8] = (ogg_int16_t)((_Fd - _Bdd ) >> 4);
}else{
op[0*8] = 0;
op[7*8] = 0;
op[1*8] = 0;
op[2*8] = 0;
op[3*8] = 0;
op[4*8] = 0;
op[5*8] = 0;
op[6*8] = 0;
}
ip++; /* next column */
op++;
}
/*Performs an inverse 8 point Type-II DCT transform.
The output is scaled by a factor of 2 relative to the orthonormal version of
the transform.
_y: The buffer to store the result in.
Data will be placed in every 8th entry (e.g., in a column of an 8x8
block).
_x: The input coefficients.
Only the first 4 entries are used.
The other 4 are assumed to be 0.*/
static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){
ogg_int32_t t[8];
ogg_int32_t r;
/*Stage 1:*/
t[0]=OC_C4S4*_x[0]>>16;
t[2]=OC_C6S2*_x[2]>>16;
t[3]=OC_C2S6*_x[2]>>16;
t[4]=OC_C7S1*_x[1]>>16;
t[5]=-(OC_C5S3*_x[3]>>16);
t[6]=OC_C3S5*_x[3]>>16;
t[7]=OC_C1S7*_x[1]>>16;
/*Stage 2:*/
r=t[4]+t[5];
t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
t[4]=r;
r=t[7]+t[6];
t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
t[7]=r;
/*Stage 3:*/
t[1]=t[0]+t[2];
t[2]=t[0]-t[2];
r=t[0]+t[3];
t[3]=t[0]-t[3];
t[0]=r;
r=t[6]+t[5];
t[5]=t[6]-t[5];
t[6]=r;
/*Stage 4:*/
_y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
_y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
_y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
_y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
_y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
_y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
_y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
_y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
}
/************************
x x x x 0 0 0 0
x x x 0 0 0 0 0
x x 0 0 0 0 0 0
x 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
*************************/
static void dequant_slow10( ogg_int16_t * dequant_coeffs,
ogg_int16_t * quantized_list,
ogg_int32_t * DCT_block){
int i;
memset(DCT_block,0, 128);
for(i=0;i<10;i++)
DCT_block[dequant_index[i]] = quantized_list[i] * dequant_coeffs[i];
/*Performs an inverse 8 point Type-II DCT transform.
The output is scaled by a factor of 2 relative to the orthonormal version of
the transform.
_y: The buffer to store the result in.
Data will be placed in every 8th entry (e.g., in a column of an 8x8
block).
_x: The input coefficients.
Only the first 3 entries are used.
The other 5 are assumed to be 0.*/
static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
ogg_int32_t t[8];
ogg_int32_t r;
/*Stage 1:*/
t[0]=OC_C4S4*_x[0]>>16;
t[2]=OC_C6S2*_x[2]>>16;
t[3]=OC_C2S6*_x[2]>>16;
t[4]=OC_C7S1*_x[1]>>16;
t[7]=OC_C1S7*_x[1]>>16;
/*Stage 2:*/
t[5]=OC_C4S4*t[4]>>16;
t[6]=OC_C4S4*t[7]>>16;
/*Stage 3:*/
t[1]=t[0]+t[2];
t[2]=t[0]-t[2];
r=t[0]+t[3];
t[3]=t[0]-t[3];
t[0]=r;
r=t[6]+t[5];
t[5]=t[6]-t[5];
t[6]=r;
/*Stage 4:*/
_y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
_y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
_y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
_y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
_y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
_y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
_y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
_y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
}
void IDct10( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData ){
ogg_int32_t IntermediateData[64];
ogg_int32_t * ip = IntermediateData;
ogg_int16_t * op = OutputData;
ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
ogg_int32_t t1, t2;
int loop;
dequant_slow10( QuantMatrix, InputData, IntermediateData);
/* Inverse DCT on the rows now */
for ( loop = 0; loop < 4; loop++){
/* Check for non-zero values */
if ( ip[0] | ip[1] | ip[2] | ip[3] ){
t1 = (xC1S7 * ip[1]);
t1 >>= 16;
_A = t1;
t1 = (xC7S1 * ip[1]);
t1 >>= 16;
_B = t1 ;
t1 = (xC3S5 * ip[3]);
t1 >>= 16;
_C = t1;
t2 = (xC5S3 * ip[3]);
t2 >>= 16;
_D = -t2;
t1 = (xC4S4 * (_A - _C));
t1 >>= 16;
_Ad = t1;
t1 = (xC4S4 * (_B - _D));
t1 >>= 16;
_Bd = t1;
_Cd = _A + _C;
_Dd = _B + _D;
t1 = (xC4S4 * ip[0] );
t1 >>= 16;
_E = t1;
_F = t1;
t1 = (xC2S6 * ip[2]);
t1 >>= 16;
_G = t1;
t1 = (xC6S2 * ip[2]);
t1 >>= 16;
_H = t1 ;
_Ed = _E - _G;
_Gd = _E + _G;
_Add = _F + _Ad;
_Bdd = _Bd - _H;
_Fd = _F - _Ad;
_Hd = _Bd + _H;
/* Final sequence of operations over-write original inputs. */
ip[0] = (ogg_int16_t)((_Gd + _Cd ) >> 0);
ip[7] = (ogg_int16_t)((_Gd - _Cd ) >> 0);
ip[1] = (ogg_int16_t)((_Add + _Hd ) >> 0);
ip[2] = (ogg_int16_t)((_Add - _Hd ) >> 0);
ip[3] = (ogg_int16_t)((_Ed + _Dd ) >> 0);
ip[4] = (ogg_int16_t)((_Ed - _Dd ) >> 0);
ip[5] = (ogg_int16_t)((_Fd + _Bdd ) >> 0);
ip[6] = (ogg_int16_t)((_Fd - _Bdd ) >> 0);
}
ip += 8; /* next row */
}
ip = IntermediateData;
for ( loop = 0; loop < 8; loop++) {
/* Check for non-zero values (bitwise or faster than ||) */
if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] ) {
t1 = (xC1S7 * ip[1*8]);
t1 >>= 16;
_A = t1 ;
t1 = (xC7S1 * ip[1*8]);
t1 >>= 16;
_B = t1 ;
t1 = (xC3S5 * ip[3*8]);
t1 >>= 16;
_C = t1 ;
t2 = (xC5S3 * ip[3*8]);
t2 >>= 16;
_D = - t2;
t1 = (xC4S4 * (_A - _C));
t1 >>= 16;
_Ad = t1;
t1 = (xC4S4 * (_B - _D));
t1 >>= 16;
_Bd = t1;
_Cd = _A + _C;
_Dd = _B + _D;
t1 = (xC4S4 * ip[0*8]);
t1 >>= 16;
_E = t1;
_F = t1;
t1 = (xC2S6 * ip[2*8]);
t1 >>= 16;
_G = t1;
t1 = (xC6S2 * ip[2*8]);
t1 >>= 16;
_H = t1;
_Ed = _E - _G;
_Gd = _E + _G;
_Add = _F + _Ad;
_Bdd = _Bd - _H;
_Fd = _F - _Ad;
_Hd = _Bd + _H;
_Gd += IdctAdjustBeforeShift;
_Add += IdctAdjustBeforeShift;
_Ed += IdctAdjustBeforeShift;
_Fd += IdctAdjustBeforeShift;
/* Final sequence of operations over-write original inputs. */
op[0*8] = (ogg_int16_t)((_Gd + _Cd ) >> 4);
op[7*8] = (ogg_int16_t)((_Gd - _Cd ) >> 4);
op[1*8] = (ogg_int16_t)((_Add + _Hd ) >> 4);
op[2*8] = (ogg_int16_t)((_Add - _Hd ) >> 4);
op[3*8] = (ogg_int16_t)((_Ed + _Dd ) >> 4);
op[4*8] = (ogg_int16_t)((_Ed - _Dd ) >> 4);
op[5*8] = (ogg_int16_t)((_Fd + _Bdd ) >> 4);
op[6*8] = (ogg_int16_t)((_Fd - _Bdd ) >> 4);
}else{
op[0*8] = 0;
op[7*8] = 0;
op[1*8] = 0;
op[2*8] = 0;
op[3*8] = 0;
op[4*8] = 0;
op[5*8] = 0;
op[6*8] = 0;
}
ip++; /* next column */
op++;
}
/*Performs an inverse 8 point Type-II DCT transform.
The output is scaled by a factor of 2 relative to the orthonormal version of
the transform.
_y: The buffer to store the result in.
Data will be placed in every 8th entry (e.g., in a column of an 8x8
block).
_x: The input coefficients.
Only the first 2 entries are used.
The other 6 are assumed to be 0.*/
static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
ogg_int32_t t[8];
ogg_int32_t r;
/*Stage 1:*/
t[0]=OC_C4S4*_x[0]>>16;
t[4]=OC_C7S1*_x[1]>>16;
t[7]=OC_C1S7*_x[1]>>16;
/*Stage 2:*/
t[5]=OC_C4S4*t[4]>>16;
t[6]=OC_C4S4*t[7]>>16;
/*Stage 3:*/
r=t[6]+t[5];
t[5]=t[6]-t[5];
t[6]=r;
/*Stage 4:*/
_y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
_y[1<<3]=(ogg_int16_t)(t[0]+t[6]);
_y[2<<3]=(ogg_int16_t)(t[0]+t[5]);
_y[3<<3]=(ogg_int16_t)(t[0]+t[4]);
_y[4<<3]=(ogg_int16_t)(t[0]-t[4]);
_y[5<<3]=(ogg_int16_t)(t[0]-t[5]);
_y[6<<3]=(ogg_int16_t)(t[0]-t[6]);
_y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
}
/***************************
x 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
**************************/
void IDct1( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData ){
int loop;
ogg_int16_t OutD;
OutD=(ogg_int16_t) ((ogg_int32_t)(InputData[0]*QuantMatrix[0]+15)>>5);
for(loop=0;loop<64;loop++)
OutputData[loop]=OutD;
/*Performs an inverse 8 point Type-II DCT transform.
The output is scaled by a factor of 2 relative to the orthonormal version of
the transform.
_y: The buffer to store the result in.
Data will be placed in every 8th entry (e.g., in a column of an 8x8
block).
_x: The input coefficients.
Only the first entry is used.
The other 7 are assumed to be 0.*/
static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){
_y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]=
_y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16);
}
/*Performs an inverse 8x8 Type-II DCT transform.
The input is assumed to be scaled by a factor of 4 relative to orthonormal
version of the transform.
All coefficients but the first 3 in zig-zag scan order are assumed to be 0:
x x 0 0 0 0 0 0
x 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
_y: The buffer to store the result in.
This may be the same as _x.
_x: The input coefficients.*/
static void oc_idct8x8_3(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
const ogg_int16_t *in;
ogg_int16_t *end;
ogg_int16_t *out;
ogg_int16_t w[64];
/*Transform rows of x into columns of w.*/
idct8_2(w,_x);
idct8_1(w+1,_x+8);
/*Transform rows of w into columns of y.*/
for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8_2(out,in);
/*Adjust for the scale factor.*/
for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
}
/*Performs an inverse 8x8 Type-II DCT transform.
The input is assumed to be scaled by a factor of 4 relative to orthonormal
version of the transform.
All coefficients but the first 10 in zig-zag scan order are assumed to be 0:
x x x x 0 0 0 0
x x x 0 0 0 0 0
x x 0 0 0 0 0 0
x 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
_y: The buffer to store the result in.
This may be the same as _x.
_x: The input coefficients.*/
static void oc_idct8x8_10(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
const ogg_int16_t *in;
ogg_int16_t *end;
ogg_int16_t *out;
ogg_int16_t w[64];
/*Transform rows of x into columns of w.*/
idct8_4(w,_x);
idct8_3(w+1,_x+8);
idct8_2(w+2,_x+16);
idct8_1(w+3,_x+24);
/*Transform rows of w into columns of y.*/
for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8_4(out,in);
/*Adjust for the scale factor.*/
for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
}
/*Performs an inverse 8x8 Type-II DCT transform.
The input is assumed to be scaled by a factor of 4 relative to orthonormal
version of the transform.
_y: The buffer to store the result in.
This may be the same as _x.
_x: The input coefficients.*/
static void oc_idct8x8_slow(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
const ogg_int16_t *in;
ogg_int16_t *end;
ogg_int16_t *out;
ogg_int16_t w[64];
/*Transform rows of x into columns of w.*/
for(in=_x,out=w,end=out+8;out<end;in+=8,out++)idct8(out,in);
/*Transform rows of w into columns of y.*/
for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8(out,in);
/*Adjust for the scale factor.*/
for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
}
void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],
int _last_zzi){
(*_state->opt_vtable.idct8x8)(_y,_last_zzi);
}
/*Performs an inverse 8x8 Type-II DCT transform.
The input is assumed to be scaled by a factor of 4 relative to orthonormal
version of the transform.*/
void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi){
/*_last_zzi is subtly different from an actual count of the number of
coefficients we decoded for this block.
It contains the value of zzi BEFORE the final token in the block was
decoded.
In most cases this is an EOB token (the continuation of an EOB run from a
previous block counts), and so this is the same as the coefficient count.
However, in the case that the last token was NOT an EOB token, but filled
the block up with exactly 64 coefficients, _last_zzi will be less than 64.
Provided the last token was not a pure zero run, the minimum value it can
be is 46, and so that doesn't affect any of the cases in this routine.
However, if the last token WAS a pure zero run of length 63, then _last_zzi
will be 1 while the number of coefficients decoded is 64.
Thus, we will trigger the following special case, where the real
coefficient count would not.
Note also that a zero run of length 64 will give _last_zzi a value of 0,
but we still process the DC coefficient, which might have a non-zero value
due to DC prediction.
Although convoluted, this is arguably the correct behavior: it allows us to
use a smaller transform when the block ends with a long zero run instead
of a normal EOB token.
It could be smarter... multiple separate zero runs at the end of a block
will fool it, but an encoder that generates these really deserves what it
gets.
Needless to say we inherited this approach from VP3.*/
/*Then perform the iDCT.*/
if(_last_zzi<3)oc_idct8x8_3(_y,_y);
else if(_last_zzi<10)oc_idct8x8_10(_y,_y);
else oc_idct8x8_slow(_y,_y);
}

View File

@ -0,0 +1,131 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: info.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "internal.h"
/*This is more or less the same as strncasecmp, but that doesn't exist
everywhere, and this is a fairly trivial function, so we include it.
Note: We take advantage of the fact that we know _n is less than or equal to
the length of at least one of the strings.*/
static int oc_tagcompare(const char *_s1,const char *_s2,int _n){
int c;
for(c=0;c<_n;c++){
if(toupper(_s1[c])!=toupper(_s2[c]))return !0;
}
return _s1[c]!='=';
}
void th_info_init(th_info *_info){
memset(_info,0,sizeof(*_info));
_info->version_major=TH_VERSION_MAJOR;
_info->version_minor=TH_VERSION_MINOR;
_info->version_subminor=TH_VERSION_SUB;
_info->keyframe_granule_shift=6;
}
void th_info_clear(th_info *_info){
memset(_info,0,sizeof(*_info));
}
void th_comment_init(th_comment *_tc){
memset(_tc,0,sizeof(*_tc));
}
void th_comment_add(th_comment *_tc,char *_comment){
char **user_comments;
int *comment_lengths;
int comment_len;
user_comments=_ogg_realloc(_tc->user_comments,
(_tc->comments+2)*sizeof(*_tc->user_comments));
if(user_comments==NULL)return;
_tc->user_comments=user_comments;
comment_lengths=_ogg_realloc(_tc->comment_lengths,
(_tc->comments+2)*sizeof(*_tc->comment_lengths));
if(comment_lengths==NULL)return;
_tc->comment_lengths=comment_lengths;
comment_len=strlen(_comment);
comment_lengths[_tc->comments]=comment_len;
user_comments[_tc->comments]=_ogg_malloc(comment_len+1);
if(user_comments[_tc->comments]==NULL)return;
memcpy(_tc->user_comments[_tc->comments],_comment,comment_len+1);
_tc->comments++;
_tc->user_comments[_tc->comments]=NULL;
}
void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val){
char *comment;
int tag_len;
int val_len;
tag_len=strlen(_tag);
val_len=strlen(_val);
/*+2 for '=' and '\0'.*/
comment=_ogg_malloc(tag_len+val_len+2);
if(comment==NULL)return;
memcpy(comment,_tag,tag_len);
comment[tag_len]='=';
memcpy(comment+tag_len+1,_val,val_len+1);
th_comment_add(_tc,comment);
_ogg_free(comment);
}
char *th_comment_query(th_comment *_tc,char *_tag,int _count){
long i;
int found;
int tag_len;
tag_len=strlen(_tag);
found=0;
for(i=0;i<_tc->comments;i++){
if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len)){
/*We return a pointer to the data, not a copy.*/
if(_count==found++)return _tc->user_comments[i]+tag_len+1;
}
}
/*Didn't find anything.*/
return NULL;
}
int th_comment_query_count(th_comment *_tc,char *_tag){
long i;
int tag_len;
int count;
tag_len=strlen(_tag);
count=0;
for(i=0;i<_tc->comments;i++){
if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len))count++;
}
return count;
}
void th_comment_clear(th_comment *_tc){
if(_tc!=NULL){
long i;
for(i=0;i<_tc->comments;i++)_ogg_free(_tc->user_comments[i]);
_ogg_free(_tc->user_comments);
_ogg_free(_tc->comment_lengths);
_ogg_free(_tc->vendor);
memset(_tc,0,sizeof(*_tc));
}
}

View File

@ -0,0 +1,262 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: internal.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include "internal.h"
/*A map from the index in the zig zag scan to the coefficient number in a
block.
All zig zag indices beyond 63 are sent to coefficient 64, so that zero runs
past the end of a block in bogus streams get mapped to a known location.*/
const unsigned char OC_FZIG_ZAG[128]={
0, 1, 8,16, 9, 2, 3,10,
17,24,32,25,18,11, 4, 5,
12,19,26,33,40,48,41,34,
27,20,13, 6, 7,14,21,28,
35,42,49,56,57,50,43,36,
29,22,15,23,30,37,44,51,
58,59,52,45,38,31,39,46,
53,60,61,54,47,55,62,63,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64
};
/*A map from the coefficient number in a block to its index in the zig zag
scan.*/
const unsigned char OC_IZIG_ZAG[64]={
0, 1, 5, 6,14,15,27,28,
2, 4, 7,13,16,26,29,42,
3, 8,12,17,25,30,41,43,
9,11,18,24,31,40,44,53,
10,19,23,32,39,45,52,54,
20,22,33,38,46,51,55,60,
21,34,37,47,50,56,59,61,
35,36,48,49,57,58,62,63
};
/*A map from physical macro block ordering to bitstream macro block
ordering within a super block.*/
const unsigned char OC_MB_MAP[2][2]={{0,3},{1,2}};
/*A list of the indices in the oc_mb.map array that can be valid for each of
the various chroma decimation types.*/
const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={
{0,1,2,3,4,8},
{0,1,2,3,4,5,8,9},
{0,1,2,3,4,6,8,10},
{0,1,2,3,4,5,6,7,8,9,10,11}
};
/*The number of indices in the oc_mb.map array that can be valid for each of
the various chroma decimation types.*/
const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12};
/*The number of extra bits that are coded with each of the DCT tokens.
Each DCT token has some fixed number of additional bits (possibly 0) stored
after the token itself, containing, for example, coefficient magnitude,
sign bits, etc.*/
const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]={
0,0,0,2,3,4,12,3,6,
0,0,0,0,
1,1,1,1,2,3,4,5,6,10,
1,1,1,1,1,3,4,
2,3
};
int oc_ilog(unsigned _v){
int ret;
for(ret=0;_v;ret++)_v>>=1;
return ret;
}
/*The function used to fill in the chroma plane motion vectors for a macro
block when 4 different motion vectors are specified in the luma plane.
This version is for use with chroma decimated in the X and Y directions
(4:2:0).
_cbmvs: The chroma block-level motion vectors to fill in.
_lbmvs: The luma block-level motion vectors.*/
static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
int dx;
int dy;
dx=_lbmvs[0][0]+_lbmvs[1][0]+_lbmvs[2][0]+_lbmvs[3][0];
dy=_lbmvs[0][1]+_lbmvs[1][1]+_lbmvs[2][1]+_lbmvs[3][1];
_cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,2,2);
_cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,2,2);
}
/*The function used to fill in the chroma plane motion vectors for a macro
block when 4 different motion vectors are specified in the luma plane.
This version is for use with chroma decimated in the Y direction.
_cbmvs: The chroma block-level motion vectors to fill in.
_lbmvs: The luma block-level motion vectors.*/
static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
int dx;
int dy;
dx=_lbmvs[0][0]+_lbmvs[2][0];
dy=_lbmvs[0][1]+_lbmvs[2][1];
_cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
_cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
dx=_lbmvs[1][0]+_lbmvs[3][0];
dy=_lbmvs[1][1]+_lbmvs[3][1];
_cbmvs[1][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
_cbmvs[1][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
}
/*The function used to fill in the chroma plane motion vectors for a macro
block when 4 different motion vectors are specified in the luma plane.
This version is for use with chroma decimated in the X direction (4:2:2).
_cbmvs: The chroma block-level motion vectors to fill in.
_lbmvs: The luma block-level motion vectors.*/
static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
int dx;
int dy;
dx=_lbmvs[0][0]+_lbmvs[1][0];
dy=_lbmvs[0][1]+_lbmvs[1][1];
_cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
_cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
dx=_lbmvs[2][0]+_lbmvs[3][0];
dy=_lbmvs[2][1]+_lbmvs[3][1];
_cbmvs[2][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
_cbmvs[2][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
}
/*The function used to fill in the chroma plane motion vectors for a macro
block when 4 different motion vectors are specified in the luma plane.
This version is for use with no chroma decimation (4:4:4).
_cbmvs: The chroma block-level motion vectors to fill in.
_lmbmv: The luma macro-block level motion vector to fill in for use in
prediction.
_lbmvs: The luma block-level motion vectors.*/
static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
memcpy(_cbmvs,_lbmvs,4*sizeof(_lbmvs[0]));
}
/*A table of functions used to fill in the chroma plane motion vectors for a
macro block when 4 different motion vectors are specified in the luma
plane.*/
const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
(oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
(oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
(oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
(oc_set_chroma_mvs_func)oc_set_chroma_mvs11
};
void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz){
size_t rowsz;
size_t colsz;
size_t datsz;
char *ret;
colsz=_height*sizeof(void *);
rowsz=_sz*_width;
datsz=rowsz*_height;
/*Alloc array and row pointers.*/
ret=(char *)_ogg_malloc(datsz+colsz);
if(ret==NULL)return NULL;
/*Initialize the array.*/
if(ret!=NULL){
size_t i;
void **p;
char *datptr;
p=(void **)ret;
i=_height;
for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr;
}
return (void **)ret;
}
void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz){
size_t colsz;
size_t rowsz;
size_t datsz;
char *ret;
colsz=_height*sizeof(void *);
rowsz=_sz*_width;
datsz=rowsz*_height;
/*Alloc array and row pointers.*/
ret=(char *)_ogg_calloc(datsz+colsz,1);
if(ret==NULL)return NULL;
/*Initialize the array.*/
if(ret!=NULL){
size_t i;
void **p;
char *datptr;
p=(void **)ret;
i=_height;
for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr;
}
return (void **)ret;
}
void oc_free_2d(void *_ptr){
_ogg_free(_ptr);
}
/*Fills in a Y'CbCr buffer with a pointer to the image data in the first
buffer, but with the opposite vertical orientation.
_dst: The destination buffer.
This can be the same as _src.
_src: The source buffer.*/
void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
const th_ycbcr_buffer _src){
int pli;
for(pli=0;pli<3;pli++){
_dst[pli].width=_src[pli].width;
_dst[pli].height=_src[pli].height;
_dst[pli].stride=-_src[pli].stride;
_dst[pli].data=_src[pli].data
+(1-_dst[pli].height)*(ptrdiff_t)_dst[pli].stride;
}
}
const char *th_version_string(void){
return OC_VENDOR_STRING;
}
ogg_uint32_t th_version_number(void){
return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+TH_VERSION_SUB;
}
/*Determines the packet type.
Note that this correctly interprets a 0-byte packet as a video data packet.
Return: 1 for a header packet, 0 for a data packet.*/
int th_packet_isheader(ogg_packet *_op){
return _op->bytes>0?_op->packet[0]>>7:0;
}
/*Determines the frame type of a video data packet.
Note that this correctly interprets a 0-byte packet as a delta frame.
Return: 1 for a key frame, 0 for a delta frame, and -1 for a header
packet.*/
int th_packet_iskeyframe(ogg_packet *_op){
return _op->bytes<=0?0:_op->packet[0]&0x80?-1:!(_op->packet[0]&0x40);
}

View File

@ -0,0 +1,509 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: internal.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#if !defined(_internal_H)
# define _internal_H (1)
# include <stdlib.h>
# include <limits.h>
# if defined(HAVE_CONFIG_H)
# include <config.h>
# endif
# include "theora/codec.h"
# include "theora/theora.h"
# if defined(_MSC_VER)
/*Disable missing EMMS warnings.*/
# pragma warning(disable:4799)
/*Thank you Microsoft, I know the order of operations.*/
# pragma warning(disable:4554)
# endif
/*You, too, gcc.*/
# if defined(__GNUC_PREREQ)
# if __GNUC_PREREQ(4,2)
# pragma GCC diagnostic ignored "-Wparentheses"
# endif
# endif
# include "ocintrin.h"
# include "huffman.h"
# include "quant.h"
/*Some assembly constructs require aligned operands.*/
# if defined(OC_X86_ASM)
# if defined(__GNUC__)
# define OC_ALIGN8(expr) expr __attribute__((aligned(8)))
# define OC_ALIGN16(expr) expr __attribute__((aligned(16)))
# elif defined(_MSC_VER)
# define OC_ALIGN8(expr) __declspec (align(8)) expr
# define OC_ALIGN16(expr) __declspec (align(16)) expr
# endif
# endif
# if !defined(OC_ALIGN8)
# define OC_ALIGN8(expr) expr
# endif
# if !defined(OC_ALIGN16)
# define OC_ALIGN16(expr) expr
# endif
typedef struct oc_sb_flags oc_sb_flags;
typedef struct oc_border_info oc_border_info;
typedef struct oc_fragment oc_fragment;
typedef struct oc_fragment_plane oc_fragment_plane;
typedef struct oc_base_opt_vtable oc_base_opt_vtable;
typedef struct oc_base_opt_data oc_base_opt_data;
typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable;
typedef struct oc_theora_state oc_theora_state;
/*This library's version.*/
# define OC_VENDOR_STRING "Xiph.Org libtheora 1.1 20090822 (Thusnelda)"
/*Theora bitstream version.*/
# define TH_VERSION_MAJOR (3)
# define TH_VERSION_MINOR (2)
# define TH_VERSION_SUB (1)
# define TH_VERSION_CHECK(_info,_maj,_min,_sub) \
((_info)->version_major>(_maj)||(_info)->version_major==(_maj)&& \
((_info)->version_minor>(_min)||(_info)->version_minor==(_min)&& \
(_info)->version_subminor>=(_sub)))
/*A keyframe.*/
#define OC_INTRA_FRAME (0)
/*A predicted frame.*/
#define OC_INTER_FRAME (1)
/*A frame of unknown type (frame type decision has not yet been made).*/
#define OC_UNKWN_FRAME (-1)
/*The amount of padding to add to the reconstructed frame buffers on all
sides.
This is used to allow unrestricted motion vectors without special casing.
This must be a multiple of 2.*/
#define OC_UMV_PADDING (16)
/*Frame classification indices.*/
/*The previous golden frame.*/
#define OC_FRAME_GOLD (0)
/*The previous frame.*/
#define OC_FRAME_PREV (1)
/*The current frame.*/
#define OC_FRAME_SELF (2)
/*The input or output buffer.*/
#define OC_FRAME_IO (3)
/*Macroblock modes.*/
/*Macro block is invalid: It is never coded.*/
#define OC_MODE_INVALID (-1)
/*Encoded difference from the same macro block in the previous frame.*/
#define OC_MODE_INTER_NOMV (0)
/*Encoded with no motion compensated prediction.*/
#define OC_MODE_INTRA (1)
/*Encoded difference from the previous frame offset by the given motion
vector.*/
#define OC_MODE_INTER_MV (2)
/*Encoded difference from the previous frame offset by the last coded motion
vector.*/
#define OC_MODE_INTER_MV_LAST (3)
/*Encoded difference from the previous frame offset by the second to last
coded motion vector.*/
#define OC_MODE_INTER_MV_LAST2 (4)
/*Encoded difference from the same macro block in the previous golden
frame.*/
#define OC_MODE_GOLDEN_NOMV (5)
/*Encoded difference from the previous golden frame offset by the given motion
vector.*/
#define OC_MODE_GOLDEN_MV (6)
/*Encoded difference from the previous frame offset by the individual motion
vectors given for each block.*/
#define OC_MODE_INTER_MV_FOUR (7)
/*The number of (coded) modes.*/
#define OC_NMODES (8)
/*Determines the reference frame used for a given MB mode.*/
#define OC_FRAME_FOR_MODE(_x) \
OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \
OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x))
/*Constants for the packet state machine common between encoder and decoder.*/
/*Next packet to emit/read: Codec info header.*/
#define OC_PACKET_INFO_HDR (-3)
/*Next packet to emit/read: Comment header.*/
#define OC_PACKET_COMMENT_HDR (-2)
/*Next packet to emit/read: Codec setup header.*/
#define OC_PACKET_SETUP_HDR (-1)
/*No more packets to emit/read.*/
#define OC_PACKET_DONE (INT_MAX)
/*Super blocks are 32x32 segments of pixels in a single color plane indexed
in image order.
Internally, super blocks are broken up into four quadrants, each of which
contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels.
Quadrants, and the blocks within them, are indexed in a special order called
a "Hilbert curve" within the super block.
In order to differentiate between the Hilbert-curve indexing strategy and
the regular image order indexing strategy, blocks indexed in image order
are called "fragments".
Fragments are indexed in image order, left to right, then bottom to top,
from Y' plane to Cb plane to Cr plane.
The co-located fragments in all image planes corresponding to the location
of a single quadrant of a luma plane super block form a macro block.
Thus there is only a single set of macro blocks for all planes, each of which
contains between 6 and 12 fragments, depending on the pixel format.
Therefore macro block information is kept in a separate set of arrays from
super blocks to avoid unused space in the other planes.
The lists are indexed in super block order.
That is, the macro block corresponding to the macro block mbi in (luma plane)
super block sbi is at index (sbi<<2|mbi).
Thus the number of macro blocks in each dimension is always twice the number
of super blocks, even when only an odd number fall inside the coded frame.
These "extra" macro blocks are just an artifact of our internal data layout,
and not part of the coded stream; they are flagged with a negative MB mode.*/
/*A single quadrant of the map from a super block to fragment numbers.*/
typedef ptrdiff_t oc_sb_map_quad[4];
/*A map from a super block to fragment numbers.*/
typedef oc_sb_map_quad oc_sb_map[4];
/*A single plane of the map from a macro block to fragment numbers.*/
typedef ptrdiff_t oc_mb_map_plane[4];
/*A map from a macro block to fragment numbers.*/
typedef oc_mb_map_plane oc_mb_map[3];
/*A motion vector.*/
typedef signed char oc_mv[2];
/*Super block information.*/
struct oc_sb_flags{
unsigned char coded_fully:1;
unsigned char coded_partially:1;
unsigned char quad_valid:4;
};
/*Information about a fragment which intersects the border of the displayable
region.
This marks which pixels belong to the displayable region.*/
struct oc_border_info{
/*A bit mask marking which pixels are in the displayable region.
Pixel (x,y) corresponds to bit (y<<3|x).*/
ogg_int64_t mask;
/*The number of pixels in the displayable region.
This is always positive, and always less than 64.*/
int npixels;
};
/*Fragment information.*/
struct oc_fragment{
/*A flag indicating whether or not this fragment is coded.*/
unsigned coded:1;
/*A flag indicating that this entire fragment lies outside the displayable
region of the frame.
Note the contrast with an invalid macro block, which is outside the coded
frame, not just the displayable one.
There are no fragments outside the coded frame by construction.*/
unsigned invalid:1;
/*The index of the quality index used for this fragment's AC coefficients.*/
unsigned qii:6;
/*The mode of the macroblock this fragment belongs to.*/
unsigned mb_mode:3;
/*The index of the associated border information for fragments which lie
partially outside the displayable region.
For fragments completely inside or outside this region, this is -1.
Note that the C standard requires an explicit signed keyword for bitfield
types, since some compilers may treat them as unsigned without it.*/
signed int borderi:5;
/*The prediction-corrected DC component.
Note that the C standard requires an explicit signed keyword for bitfield
types, since some compilers may treat them as unsigned without it.*/
signed int dc:16;
};
/*A description of each fragment plane.*/
struct oc_fragment_plane{
/*The number of fragments in the horizontal direction.*/
int nhfrags;
/*The number of fragments in the vertical direction.*/
int nvfrags;
/*The offset of the first fragment in the plane.*/
ptrdiff_t froffset;
/*The total number of fragments in the plane.*/
ptrdiff_t nfrags;
/*The number of super blocks in the horizontal direction.*/
unsigned nhsbs;
/*The number of super blocks in the vertical direction.*/
unsigned nvsbs;
/*The offset of the first super block in the plane.*/
unsigned sboffset;
/*The total number of super blocks in the plane.*/
unsigned nsbs;
};
/*The shared (encoder and decoder) functions that have accelerated variants.*/
struct oc_base_opt_vtable{
void (*frag_copy)(unsigned char *_dst,
const unsigned char *_src,int _ystride);
void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
const ogg_int16_t _residue[64]);
void (*frag_recon_inter)(unsigned char *_dst,
const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1,
const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi);
void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
void (*state_frag_copy_list)(const oc_theora_state *_state,
const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
int _dst_frame,int _src_frame,int _pli);
void (*state_loop_filter_frag_rows)(const oc_theora_state *_state,
int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
void (*restore_fpu)(void);
};
/*The shared (encoder and decoder) tables that vary according to which variants
of the above functions are used.*/
struct oc_base_opt_data{
const unsigned char *dct_fzig_zag;
};
/*State information common to both the encoder and decoder.*/
struct oc_theora_state{
/*The stream information.*/
th_info info;
/*Table for shared accelerated functions.*/
oc_base_opt_vtable opt_vtable;
/*Table for shared data used by accelerated functions.*/
oc_base_opt_data opt_data;
/*CPU flags to detect the presence of extended instruction sets.*/
ogg_uint32_t cpu_flags;
/*The fragment plane descriptions.*/
oc_fragment_plane fplanes[3];
/*The list of fragments, indexed in image order.*/
oc_fragment *frags;
/*The the offset into the reference frame buffer to the upper-left pixel of
each fragment.*/
ptrdiff_t *frag_buf_offs;
/*The motion vector for each fragment.*/
oc_mv *frag_mvs;
/*The total number of fragments in a single frame.*/
ptrdiff_t nfrags;
/*The list of super block maps, indexed in image order.*/
oc_sb_map *sb_maps;
/*The list of super block flags, indexed in image order.*/
oc_sb_flags *sb_flags;
/*The total number of super blocks in a single frame.*/
unsigned nsbs;
/*The fragments from each color plane that belong to each macro block.
Fragments are stored in image order (left to right then top to bottom).
When chroma components are decimated, the extra fragments have an index of
-1.*/
oc_mb_map *mb_maps;
/*The list of macro block modes.
A negative number indicates the macro block lies entirely outside the
coded frame.*/
signed char *mb_modes;
/*The number of macro blocks in the X direction.*/
unsigned nhmbs;
/*The number of macro blocks in the Y direction.*/
unsigned nvmbs;
/*The total number of macro blocks.*/
size_t nmbs;
/*The list of coded fragments, in coded order.
Uncoded fragments are stored in reverse order from the end of the list.*/
ptrdiff_t *coded_fragis;
/*The number of coded fragments in each plane.*/
ptrdiff_t ncoded_fragis[3];
/*The total number of coded fragments.*/
ptrdiff_t ntotal_coded_fragis;
/*The index of the buffers being used for each OC_FRAME_* reference frame.*/
int ref_frame_idx[4];
/*The actual buffers used for the previously decoded frames.*/
th_ycbcr_buffer ref_frame_bufs[4];
/*The storage for the reference frame buffers.*/
unsigned char *ref_frame_data[4];
/*The strides for each plane in the reference frames.*/
int ref_ystride[3];
/*The number of unique border patterns.*/
int nborders;
/*The unique border patterns for all border fragments.
The borderi field of fragments which straddle the border indexes this
list.*/
oc_border_info borders[16];
/*The frame number of the last keyframe.*/
ogg_int64_t keyframe_num;
/*The frame number of the current frame.*/
ogg_int64_t curframe_num;
/*The granpos of the current frame.*/
ogg_int64_t granpos;
/*The type of the current frame.*/
unsigned char frame_type;
/*The bias to add to the frame count when computing granule positions.*/
unsigned char granpos_bias;
/*The number of quality indices used in the current frame.*/
unsigned char nqis;
/*The quality indices of the current frame.*/
unsigned char qis[3];
/*The dequantization tables, stored in zig-zag order, and indexed by
qi, pli, qti, and zzi.*/
ogg_uint16_t *dequant_tables[64][3][2];
OC_ALIGN16(oc_quant_table dequant_table_data[64][3][2]);
/*Loop filter strength parameters.*/
unsigned char loop_filter_limits[64];
};
/*The function type used to fill in the chroma plane motion vectors for a
macro block when 4 different motion vectors are specified in the luma
plane.
_cbmvs: The chroma block-level motion vectors to fill in.
_lmbmv: The luma macro-block level motion vector to fill in for use in
prediction.
_lbmvs: The luma block-level motion vectors.*/
typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]);
/*A map from the index in the zig zag scan to the coefficient number in a
block.*/
extern const unsigned char OC_FZIG_ZAG[128];
/*A map from the coefficient number in a block to its index in the zig zag
scan.*/
extern const unsigned char OC_IZIG_ZAG[64];
/*A map from physical macro block ordering to bitstream macro block
ordering within a super block.*/
extern const unsigned char OC_MB_MAP[2][2];
/*A list of the indices in the oc_mb_map array that can be valid for each of
the various chroma decimation types.*/
extern const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12];
/*The number of indices in the oc_mb_map array that can be valid for each of
the various chroma decimation types.*/
extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS];
/*A table of functions used to fill in the Cb,Cr plane motion vectors for a
macro block when 4 different motion vectors are specified in the luma
plane.*/
extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS];
int oc_ilog(unsigned _v);
void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz);
void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz);
void oc_free_2d(void *_ptr);
void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
const th_ycbcr_buffer _src);
int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs);
void oc_state_clear(oc_theora_state *_state);
void oc_state_vtable_init_c(oc_theora_state *_state);
void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
int _y0,int _yend);
void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli);
void oc_state_borders_fill(oc_theora_state *_state,int _refi);
void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx,
th_ycbcr_buffer _img);
int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby);
int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
int _pli,int _dx,int _dy);
int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv);
void oc_state_loop_filter(oc_theora_state *_state,int _frame);
#if defined(OC_DUMP_IMAGES)
int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
const char *_suf);
#endif
/*Shared accelerated functions.*/
void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst,
const unsigned char *_src,int _ystride);
void oc_frag_recon_intra(const oc_theora_state *_state,
unsigned char *_dst,int _dst_ystride,const ogg_int16_t _residue[64]);
void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst,
const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
void oc_frag_recon_inter2(const oc_theora_state *_state,
unsigned char *_dst,const unsigned char *_src1,const unsigned char *_src2,
int _ystride,const ogg_int16_t _residue[64]);
void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],int _last_zzi);
void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
void oc_state_frag_copy_list(const oc_theora_state *_state,
const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
int _dst_frame,int _src_frame,int _pli);
void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,
int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
void oc_restore_fpu(const oc_theora_state *_state);
/*Default pure-C implementations.*/
void oc_frag_copy_c(unsigned char *_dst,
const unsigned char *_src,int _src_ystride);
void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
const ogg_int16_t _residue[64]);
void oc_frag_recon_inter_c(unsigned char *_dst,
const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi);
void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
void oc_state_frag_copy_list_c(const oc_theora_state *_state,
const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
int _dst_frame,int _src_frame,int _pli);
void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
void oc_restore_fpu_c(void);
/*We need a way to call a few encoder functions without introducing a link-time
dependency into the decoder, while still allowing the old alpha API which
does not distinguish between encoder and decoder objects to be used.
We do this by placing a function table at the start of the encoder object
which can dispatch into the encoder library.
We do a similar thing for the decoder in case we ever decide to split off a
common base library.*/
typedef void (*oc_state_clear_func)(theora_state *_th);
typedef int (*oc_state_control_func)(theora_state *th,int _req,
void *_buf,size_t _buf_sz);
typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th,
ogg_int64_t _granulepos);
typedef double (*oc_state_granule_time_func)(theora_state *_th,
ogg_int64_t _granulepos);
struct oc_state_dispatch_vtable{
oc_state_clear_func clear;
oc_state_control_func control;
oc_state_granule_frame_func granule_frame;
oc_state_granule_time_func granule_time;
};
#endif

View File

@ -0,0 +1,296 @@
#include "mathops.h"
#include <limits.h>
/*The fastest fallback strategy for platforms with fast multiplication appears
to be based on de Bruijn sequences~\cite{LP98}.
Tests confirmed this to be true even on an ARM11, where it is actually faster
than using the native clz instruction.
Define OC_ILOG_NODEBRUIJN to use a simpler fallback on platforms where
multiplication or table lookups are too expensive.
@UNPUBLISHED{LP98,
author="Charles E. Leiserson and Harald Prokop",
title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word",
month=Jun,
year=1998,
note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}"
}*/
#if !defined(OC_ILOG_NODEBRUIJN)&& \
!defined(OC_CLZ32)||!defined(OC_CLZ64)&&LONG_MAX<9223372036854775807LL
static const unsigned char OC_DEBRUIJN_IDX32[32]={
0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8,
31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9
};
#endif
int oc_ilog32(ogg_uint32_t _v){
#if defined(OC_CLZ32)
return (OC_CLZ32_OFFS-OC_CLZ32(_v))&-!!_v;
#else
/*On a Pentium M, this branchless version tested as the fastest version without
multiplications on 1,000,000,000 random 32-bit integers, edging out a
similar version with branches, and a 256-entry LUT version.*/
# if defined(OC_ILOG_NODEBRUIJN)
int ret;
int m;
ret=_v>0;
m=(_v>0xFFFFU)<<4;
_v>>=m;
ret|=m;
m=(_v>0xFFU)<<3;
_v>>=m;
ret|=m;
m=(_v>0xFU)<<2;
_v>>=m;
ret|=m;
m=(_v>3)<<1;
_v>>=m;
ret|=m;
ret+=_v>1;
return ret;
/*This de Bruijn sequence version is faster if you have a fast multiplier.*/
# else
int ret;
ret=_v>0;
_v|=_v>>1;
_v|=_v>>2;
_v|=_v>>4;
_v|=_v>>8;
_v|=_v>>16;
_v=(_v>>1)+1;
ret+=OC_DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F];
return ret;
# endif
#endif
}
int oc_ilog64(ogg_int64_t _v){
#if defined(OC_CLZ64)
return (OC_CLZ64_OFFS-OC_CLZ64(_v))&-!!_v;
#else
# if defined(OC_ILOG_NODEBRUIJN)
ogg_uint32_t v;
int ret;
int m;
ret=_v>0;
m=(_v>0xFFFFFFFFU)<<5;
v=(ogg_uint32_t)(_v>>m);
ret|=m;
m=(v>0xFFFFU)<<4;
v>>=m;
ret|=m;
m=(v>0xFFU)<<3;
v>>=m;
ret|=m;
m=(v>0xFU)<<2;
v>>=m;
ret|=m;
m=(v>3)<<1;
v>>=m;
ret|=m;
ret+=v>1;
return ret;
# else
/*If we don't have a 64-bit word, split it into two 32-bit halves.*/
# if LONG_MAX<9223372036854775807LL
ogg_uint32_t v;
int ret;
int m;
ret=_v>0;
m=(_v>0xFFFFFFFFU)<<5;
v=(ogg_uint32_t)(_v>>m);
ret|=m;
v|=v>>1;
v|=v>>2;
v|=v>>4;
v|=v>>8;
v|=v>>16;
v=(v>>1)+1;
ret+=OC_DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F];
return ret;
/*Otherwise do it in one 64-bit operation.*/
# else
static const unsigned char OC_DEBRUIJN_IDX64[64]={
0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
};
int ret;
ret=_v>0;
_v|=_v>>1;
_v|=_v>>2;
_v|=_v>>4;
_v|=_v>>8;
_v|=_v>>16;
_v|=_v>>32;
_v=(_v>>1)+1;
ret+=OC_DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F];
return ret;
# endif
# endif
#endif
}
/*round(2**(62+i)*atanh(2**(-(i+1)))/log(2))*/
static const ogg_int64_t OC_ATANH_LOG2[32]={
0x32B803473F7AD0F4LL,0x2F2A71BD4E25E916LL,0x2E68B244BB93BA06LL,
0x2E39FB9198CE62E4LL,0x2E2E683F68565C8FLL,0x2E2B850BE2077FC1LL,
0x2E2ACC58FE7B78DBLL,0x2E2A9E2DE52FD5F2LL,0x2E2A92A338D53EECLL,
0x2E2A8FC08F5E19B6LL,0x2E2A8F07E51A485ELL,0x2E2A8ED9BA8AF388LL,
0x2E2A8ECE2FE7384ALL,0x2E2A8ECB4D3E4B1ALL,0x2E2A8ECA94940FE8LL,
0x2E2A8ECA6669811DLL,0x2E2A8ECA5ADEDD6ALL,0x2E2A8ECA57FC347ELL,
0x2E2A8ECA57438A43LL,0x2E2A8ECA57155FB4LL,0x2E2A8ECA5709D510LL,
0x2E2A8ECA5706F267LL,0x2E2A8ECA570639BDLL,0x2E2A8ECA57060B92LL,
0x2E2A8ECA57060008LL,0x2E2A8ECA5705FD25LL,0x2E2A8ECA5705FC6CLL,
0x2E2A8ECA5705FC3ELL,0x2E2A8ECA5705FC33LL,0x2E2A8ECA5705FC30LL,
0x2E2A8ECA5705FC2FLL,0x2E2A8ECA5705FC2FLL
};
/*Computes the binary exponential of _z, a log base 2 in Q57 format.*/
ogg_int64_t oc_bexp64(ogg_int64_t _z){
ogg_int64_t w;
ogg_int64_t z;
int ipart;
ipart=(int)(_z>>57);
if(ipart<0)return 0;
if(ipart>=63)return 0x7FFFFFFFFFFFFFFFLL;
z=_z-OC_Q57(ipart);
if(z){
ogg_int64_t mask;
long wlo;
int i;
/*C doesn't give us 64x64->128 muls, so we use CORDIC.
This is not particularly fast, but it's not being used in time-critical
code; it is very accurate.*/
/*z is the fractional part of the log in Q62 format.
We need 1 bit of headroom since the magnitude can get larger than 1
during the iteration, and a sign bit.*/
z<<=5;
/*w is the exponential in Q61 format (since it also needs headroom and can
get as large as 2.0); we could get another bit if we dropped the sign,
but we'll recover that bit later anyway.
Ideally this should start out as
\lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}}
but in order to guarantee convergence we have to repeat iterations 4,
13 (=3*4+1), and 40 (=3*13+1, etc.), so it winds up somewhat larger.*/
w=0x26A3D0E401DD846DLL;
for(i=0;;i++){
mask=-(z<0);
w+=(w>>i+1)+mask^mask;
z-=OC_ATANH_LOG2[i]+mask^mask;
/*Repeat iteration 4.*/
if(i>=3)break;
z<<=1;
}
for(;;i++){
mask=-(z<0);
w+=(w>>i+1)+mask^mask;
z-=OC_ATANH_LOG2[i]+mask^mask;
/*Repeat iteration 13.*/
if(i>=12)break;
z<<=1;
}
for(;i<32;i++){
mask=-(z<0);
w+=(w>>i+1)+mask^mask;
z=z-(OC_ATANH_LOG2[i]+mask^mask)<<1;
}
wlo=0;
/*Skip the remaining iterations unless we really require that much
precision.
We could have bailed out earlier for smaller iparts, but that would
require initializing w from a table, as the limit doesn't converge to
61-bit precision until n=30.*/
if(ipart>30){
/*For these iterations, we just update the low bits, as the high bits
can't possibly be affected.
OC_ATANH_LOG2 has also converged (it actually did so one iteration
earlier, but that's no reason for an extra special case).*/
for(;;i++){
mask=-(z<0);
wlo+=(w>>i)+mask^mask;
z-=OC_ATANH_LOG2[31]+mask^mask;
/*Repeat iteration 40.*/
if(i>=39)break;
z<<=1;
}
for(;i<61;i++){
mask=-(z<0);
wlo+=(w>>i)+mask^mask;
z=z-(OC_ATANH_LOG2[31]+mask^mask)<<1;
}
}
w=(w<<1)+wlo;
}
else w=(ogg_int64_t)1<<62;
if(ipart<62)w=(w>>61-ipart)+1>>1;
return w;
}
/*Computes the binary logarithm of _w, returned in Q57 format.*/
ogg_int64_t oc_blog64(ogg_int64_t _w){
ogg_int64_t z;
int ipart;
if(_w<=0)return -1;
ipart=OC_ILOGNZ_64(_w)-1;
if(ipart>61)_w>>=ipart-61;
else _w<<=61-ipart;
z=0;
if(_w&_w-1){
ogg_int64_t x;
ogg_int64_t y;
ogg_int64_t u;
ogg_int64_t mask;
int i;
/*C doesn't give us 64x64->128 muls, so we use CORDIC.
This is not particularly fast, but it's not being used in time-critical
code; it is very accurate.*/
/*z is the fractional part of the log in Q61 format.*/
/*x and y are the cosh() and sinh(), respectively, in Q61 format.
We are computing z=2*atanh(y/x)=2*atanh((_w-1)/(_w+1)).*/
x=_w+((ogg_int64_t)1<<61);
y=_w-((ogg_int64_t)1<<61);
for(i=0;i<4;i++){
mask=-(y<0);
z+=(OC_ATANH_LOG2[i]>>i)+mask^mask;
u=x>>i+1;
x-=(y>>i+1)+mask^mask;
y-=u+mask^mask;
}
/*Repeat iteration 4.*/
for(i--;i<13;i++){
mask=-(y<0);
z+=(OC_ATANH_LOG2[i]>>i)+mask^mask;
u=x>>i+1;
x-=(y>>i+1)+mask^mask;
y-=u+mask^mask;
}
/*Repeat iteration 13.*/
for(i--;i<32;i++){
mask=-(y<0);
z+=(OC_ATANH_LOG2[i]>>i)+mask^mask;
u=x>>i+1;
x-=(y>>i+1)+mask^mask;
y-=u+mask^mask;
}
/*OC_ATANH_LOG2 has converged.*/
for(;i<40;i++){
mask=-(y<0);
z+=(OC_ATANH_LOG2[31]>>i)+mask^mask;
u=x>>i+1;
x-=(y>>i+1)+mask^mask;
y-=u+mask^mask;
}
/*Repeat iteration 40.*/
for(i--;i<62;i++){
mask=-(y<0);
z+=(OC_ATANH_LOG2[31]>>i)+mask^mask;
u=x>>i+1;
x-=(y>>i+1)+mask^mask;
y-=u+mask^mask;
}
z=z+8>>4;
}
return OC_Q57(ipart)+z;
}

View File

@ -0,0 +1,141 @@
#if !defined(_mathops_H)
# define _mathops_H (1)
# include <ogg/ogg.h>
# ifdef __GNUC_PREREQ
# if __GNUC_PREREQ(3,4)
# include <limits.h>
/*Note the casts to (int) below: this prevents OC_CLZ{32|64}_OFFS from
"upgrading" the type of an entire expression to an (unsigned) size_t.*/
# if INT_MAX>=2147483647
# define OC_CLZ32_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
# define OC_CLZ32(_x) (__builtin_clz(_x))
# elif LONG_MAX>=2147483647L
# define OC_CLZ32_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
# define OC_CLZ32(_x) (__builtin_clzl(_x))
# endif
# if INT_MAX>=9223372036854775807LL
# define OC_CLZ64_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
# define OC_CLZ64(_x) (__builtin_clz(_x))
# elif LONG_MAX>=9223372036854775807LL
# define OC_CLZ64_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
# define OC_CLZ64(_x) (__builtin_clzl(_x))
# elif LLONG_MAX>=9223372036854775807LL|| \
__LONG_LONG_MAX__>=9223372036854775807LL
# define OC_CLZ64_OFFS ((int)sizeof(unsigned long long)*CHAR_BIT)
# define OC_CLZ64(_x) (__builtin_clzll(_x))
# endif
# endif
# endif
/**
* oc_ilog32 - Integer binary logarithm of a 32-bit value.
* @_v: A 32-bit value.
* Returns floor(log2(_v))+1, or 0 if _v==0.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
* The OC_ILOG_32() or OC_ILOGNZ_32() macros may be able to use a builtin
* function instead, which should be faster.
*/
int oc_ilog32(ogg_uint32_t _v);
/**
* oc_ilog64 - Integer binary logarithm of a 64-bit value.
* @_v: A 64-bit value.
* Returns floor(log2(_v))+1, or 0 if _v==0.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
* The OC_ILOG_64() or OC_ILOGNZ_64() macros may be able to use a builtin
* function instead, which should be faster.
*/
int oc_ilog64(ogg_int64_t _v);
# if defined(OC_CLZ32)
/**
* OC_ILOGNZ_32 - Integer binary logarithm of a non-zero 32-bit value.
* @_v: A non-zero 32-bit value.
* Returns floor(log2(_v))+1.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
* If _v is zero, the return value is undefined; use OC_ILOG_32() instead.
*/
# define OC_ILOGNZ_32(_v) (OC_CLZ32_OFFS-OC_CLZ32(_v))
/**
* OC_ILOG_32 - Integer binary logarithm of a 32-bit value.
* @_v: A 32-bit value.
* Returns floor(log2(_v))+1, or 0 if _v==0.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
*/
# define OC_ILOG_32(_v) (OC_ILOGNZ_32(_v)&-!!(_v))
# else
# define OC_ILOGNZ_32(_v) (oc_ilog32(_v))
# define OC_ILOG_32(_v) (oc_ilog32(_v))
# endif
# if defined(CLZ64)
/**
* OC_ILOGNZ_64 - Integer binary logarithm of a non-zero 64-bit value.
* @_v: A non-zero 64-bit value.
* Returns floor(log2(_v))+1.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
* If _v is zero, the return value is undefined; use OC_ILOG_64() instead.
*/
# define OC_ILOGNZ_64(_v) (CLZ64_OFFS-CLZ64(_v))
/**
* OC_ILOG_64 - Integer binary logarithm of a 64-bit value.
* @_v: A 64-bit value.
* Returns floor(log2(_v))+1, or 0 if _v==0.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
*/
# define OC_ILOG_64(_v) (OC_ILOGNZ_64(_v)&-!!(_v))
# else
# define OC_ILOGNZ_64(_v) (oc_ilog64(_v))
# define OC_ILOG_64(_v) (oc_ilog64(_v))
# endif
# define OC_STATIC_ILOG0(_v) (!!(_v))
# define OC_STATIC_ILOG1(_v) (((_v)&0x2)?2:OC_STATIC_ILOG0(_v))
# define OC_STATIC_ILOG2(_v) \
(((_v)&0xC)?2+OC_STATIC_ILOG1((_v)>>2):OC_STATIC_ILOG1(_v))
# define OC_STATIC_ILOG3(_v) \
(((_v)&0xF0)?4+OC_STATIC_ILOG2((_v)>>4):OC_STATIC_ILOG2(_v))
# define OC_STATIC_ILOG4(_v) \
(((_v)&0xFF00)?8+OC_STATIC_ILOG3((_v)>>8):OC_STATIC_ILOG3(_v))
# define OC_STATIC_ILOG5(_v) \
(((_v)&0xFFFF0000)?16+OC_STATIC_ILOG4((_v)>>16):OC_STATIC_ILOG4(_v))
# define OC_STATIC_ILOG6(_v) \
(((_v)&0xFFFFFFFF00000000ULL)?32+OC_STATIC_ILOG5((_v)>>32):OC_STATIC_ILOG5(_v))
/**
* OC_STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant.
* @_v: A non-negative 32-bit constant.
* Returns floor(log2(_v))+1, or 0 if _v==0.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
* This macro is suitable for evaluation at compile time, but it should not be
* used on values that can change at runtime, as it operates via exhaustive
* search.
*/
# define OC_STATIC_ILOG_32(_v) (OC_STATIC_ILOG5((ogg_uint32_t)(_v)))
/**
* OC_STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant.
* @_v: A non-negative 64-bit constant.
* Returns floor(log2(_v))+1, or 0 if _v==0.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
* This macro is suitable for evaluation at compile time, but it should not be
* used on values that can change at runtime, as it operates via exhaustive
* search.
*/
# define OC_STATIC_ILOG_64(_v) (OC_STATIC_ILOG6((ogg_int64_t)(_v)))
#define OC_Q57(_v) ((ogg_int64_t)(_v)<<57)
ogg_int64_t oc_bexp64(ogg_int64_t _z);
ogg_int64_t oc_blog64(ogg_int64_t _w);
#endif

View File

@ -0,0 +1,767 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id$
********************************************************************/
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include "encint.h"
typedef struct oc_mcenc_ctx oc_mcenc_ctx;
/*Temporary state used for motion estimation.*/
struct oc_mcenc_ctx{
/*The candidate motion vectors.*/
int candidates[13][2];
/*The start of the Set B candidates.*/
int setb0;
/*The total number of candidates.*/
int ncandidates;
};
/*The maximum Y plane SAD value for accepting the median predictor.*/
#define OC_YSAD_THRESH1 (256)
/*The amount to right shift the minimum error by when inflating it for
computing the second maximum Y plane SAD threshold.*/
#define OC_YSAD_THRESH2_SCALE_BITS (4)
/*The amount to add to the second maximum Y plane threshold when inflating
it.*/
#define OC_YSAD_THRESH2_OFFSET (64)
/*The vector offsets in the X direction for each search site in the square
pattern.*/
static const int OC_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1};
/*The vector offsets in the Y direction for each search site in the square
pattern.*/
static const int OC_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1};
/*The number of sites to search for each boundary condition in the square
pattern.
Bit flags for the boundary conditions are as follows:
1: -16==dx
2: dx==15(.5)
4: -16==dy
8: dy==15(.5)*/
static const int OC_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3};
/*The list of sites to search for each boundary condition in the square
pattern.*/
static const int OC_SQUARE_SITES[11][8]={
/* -15.5<dx<31, -15.5<dy<15(.5)*/
{0,1,2,3,5,6,7,8},
/*-15.5==dx, -15.5<dy<15(.5)*/
{1,2,5,7,8},
/* dx==15(.5), -15.5<dy<15(.5)*/
{0,1,3,6,7},
/*-15.5==dx==15(.5), -15.5<dy<15(.5)*/
{-1},
/* -15.5<dx<15(.5), -15.5==dy*/
{3,5,6,7,8},
/*-15.5==dx, -15.5==dy*/
{5,7,8},
/* dx==15(.5), -15.5==dy*/
{3,6,7},
/*-15.5==dx==15(.5), -15.5==dy*/
{-1},
/*-15.5dx<15(.5), dy==15(.5)*/
{0,1,2,3,5},
/*-15.5==dx, dy==15(.5)*/
{1,2,5},
/* dx==15(.5), dy==15(.5)*/
{0,1,3}
};
static void oc_mcenc_find_candidates(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
int _accum[2],int _mbi,int _frame){
oc_mb_enc_info *embs;
int a[3][2];
int ncandidates;
unsigned nmbi;
int i;
embs=_enc->mb_info;
/*Skip a position to store the median predictor in.*/
ncandidates=1;
if(embs[_mbi].ncneighbors>0){
/*Fill in the first part of set A: the vectors from adjacent blocks.*/
for(i=0;i<embs[_mbi].ncneighbors;i++){
nmbi=embs[_mbi].cneighbors[i];
_mcenc->candidates[ncandidates][0]=embs[nmbi].analysis_mv[0][_frame][0];
_mcenc->candidates[ncandidates][1]=embs[nmbi].analysis_mv[0][_frame][1];
ncandidates++;
}
}
/*Add a few additional vectors to set A: the vectors used in the previous
frames and the (0,0) vector.*/
_mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,_accum[0],31);
_mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,_accum[1],31);
ncandidates++;
_mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
embs[_mbi].analysis_mv[1][_frame][0]+_accum[0],31);
_mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
embs[_mbi].analysis_mv[1][_frame][1]+_accum[1],31);
ncandidates++;
_mcenc->candidates[ncandidates][0]=0;
_mcenc->candidates[ncandidates][1]=0;
ncandidates++;
/*Use the first three vectors of set A to find our best predictor: their
median.*/
memcpy(a,_mcenc->candidates+1,sizeof(a));
OC_SORT2I(a[0][0],a[1][0]);
OC_SORT2I(a[0][1],a[1][1]);
OC_SORT2I(a[1][0],a[2][0]);
OC_SORT2I(a[1][1],a[2][1]);
OC_SORT2I(a[0][0],a[1][0]);
OC_SORT2I(a[0][1],a[1][1]);
_mcenc->candidates[0][0]=a[1][0];
_mcenc->candidates[0][1]=a[1][1];
/*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/
_mcenc->setb0=ncandidates;
/*The first time through the loop use the current macro block.*/
nmbi=_mbi;
for(i=0;;i++){
_mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
2*embs[_mbi].analysis_mv[1][_frame][0]
-embs[_mbi].analysis_mv[2][_frame][0]+_accum[0],31);
_mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
2*embs[_mbi].analysis_mv[1][_frame][1]
-embs[_mbi].analysis_mv[2][_frame][1]+_accum[1],31);
ncandidates++;
if(i>=embs[_mbi].npneighbors)break;
nmbi=embs[_mbi].pneighbors[i];
}
/*Truncate to full-pel positions.*/
for(i=0;i<ncandidates;i++){
_mcenc->candidates[i][0]=OC_DIV2(_mcenc->candidates[i][0]);
_mcenc->candidates[i][1]=OC_DIV2(_mcenc->candidates[i][1]);
}
_mcenc->ncandidates=ncandidates;
}
#if 0
static unsigned oc_sad16_halfpel(const oc_enc_ctx *_enc,
const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
int _mvoffset0,int _mvoffset1,const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _best_err){
unsigned err;
int bi;
err=0;
for(bi=0;bi<4;bi++){
ptrdiff_t frag_offs;
frag_offs=_frag_buf_offs[_fragis[bi]];
err+=oc_enc_frag_sad2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
_ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
}
return err;
}
#endif
static unsigned oc_satd16_halfpel(const oc_enc_ctx *_enc,
const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
int _mvoffset0,int _mvoffset1,const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _best_err){
unsigned err;
int bi;
err=0;
for(bi=0;bi<4;bi++){
ptrdiff_t frag_offs;
frag_offs=_frag_buf_offs[_fragis[bi]];
err+=oc_enc_frag_satd2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
_ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
}
return err;
}
static unsigned oc_mcenc_ysad_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
const unsigned char *_src,const unsigned char *_ref,int _ystride,
unsigned _block_err[4]){
unsigned err;
int mvoffset;
int bi;
mvoffset=_dx+_dy*_ystride;
err=0;
for(bi=0;bi<4;bi++){
ptrdiff_t frag_offs;
unsigned block_err;
frag_offs=_frag_buf_offs[_fragis[bi]];
block_err=oc_enc_frag_sad(_enc,
_src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
_block_err[bi]=block_err;
err+=block_err;
}
return err;
}
static int oc_mcenc_ysatd_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
const unsigned char *_src,const unsigned char *_ref,int _ystride){
int mvoffset;
int err;
int bi;
mvoffset=_dx+_dy*_ystride;
err=0;
for(bi=0;bi<4;bi++){
ptrdiff_t frag_offs;
frag_offs=_frag_buf_offs[_fragis[bi]];
err+=oc_enc_frag_satd_thresh(_enc,
_src+frag_offs,_ref+frag_offs+mvoffset,_ystride,UINT_MAX);
}
return err;
}
static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc,
ptrdiff_t _frag_offs,int _dx,int _dy,
const unsigned char *_src,const unsigned char *_ref,int _ystride){
return oc_enc_frag_satd_thresh(_enc,
_src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride,UINT_MAX);
}
/*Perform a motion vector search for this macro block against a single
reference frame.
As a bonus, individual block motion vectors are computed as well, as much of
the work can be shared.
The actual motion vector is stored in the appropriate place in the
oc_mb_enc_info structure.
_mcenc: The motion compensation context.
_accum: Drop frame/golden MV accumulators.
_mbi: The macro block index.
_frame: The frame to search, either OC_FRAME_PREV or OC_FRAME_GOLD.*/
void oc_mcenc_search_frame(oc_enc_ctx *_enc,int _accum[2],int _mbi,int _frame){
/*Note: Traditionally this search is done using a rate-distortion objective
function of the form D+lambda*R.
However, xiphmont tested this and found it produced a small degredation,
while requiring extra computation.
This is most likely due to Theora's peculiar MV encoding scheme: MVs are
not coded relative to a predictor, and the only truly cheap way to use a
MV is in the LAST or LAST2 MB modes, which are not being considered here.
Therefore if we use the MV found here, it's only because both LAST and
LAST2 performed poorly, and therefore the MB is not likely to be uniform
or suffer from the aperture problem.
Furthermore we would like to re-use the MV found here for as many MBs as
possible, so picking a slightly sub-optimal vector to save a bit or two
may cause increased degredation in many blocks to come.
We could artificially reduce lambda to compensate, but it's faster to just
disable it entirely, and use D (the distortion) as the sole criterion.*/
oc_mcenc_ctx mcenc;
const ptrdiff_t *frag_buf_offs;
const ptrdiff_t *fragis;
const unsigned char *src;
const unsigned char *ref;
int ystride;
oc_mb_enc_info *embs;
ogg_int32_t hit_cache[31];
ogg_int32_t hitbit;
unsigned best_block_err[4];
unsigned block_err[4];
unsigned best_err;
int best_vec[2];
int best_block_vec[4][2];
int candx;
int candy;
int bi;
embs=_enc->mb_info;
/*Find some candidate motion vectors.*/
oc_mcenc_find_candidates(_enc,&mcenc,_accum,_mbi,_frame);
/*Clear the cache of locations we've examined.*/
memset(hit_cache,0,sizeof(hit_cache));
/*Start with the median predictor.*/
candx=mcenc.candidates[0][0];
candy=mcenc.candidates[0][1];
hit_cache[candy+15]|=(ogg_int32_t)1<<candx+15;
frag_buf_offs=_enc->state.frag_buf_offs;
fragis=_enc->state.mb_maps[_mbi][0];
src=_enc->state.ref_frame_data[OC_FRAME_IO];
ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]];
ystride=_enc->state.ref_ystride[0];
/*TODO: customize error function for speed/(quality+size) tradeoff.*/
best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
best_vec[0]=candx;
best_vec[1]=candy;
if(_frame==OC_FRAME_PREV){
for(bi=0;bi<4;bi++){
best_block_err[bi]=block_err[bi];
best_block_vec[bi][0]=candx;
best_block_vec[bi][1]=candy;
}
}
/*If this predictor fails, move on to set A.*/
if(best_err>OC_YSAD_THRESH1){
unsigned err;
unsigned t2;
int ncs;
int ci;
/*Compute the early termination threshold for set A.*/
t2=embs[_mbi].error[_frame];
ncs=OC_MINI(3,embs[_mbi].ncneighbors);
for(ci=0;ci<ncs;ci++){
t2=OC_MAXI(t2,embs[embs[_mbi].cneighbors[ci]].error[_frame]);
}
t2+=(t2>>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET;
/*Examine the candidates in set A.*/
for(ci=1;ci<mcenc.setb0;ci++){
candx=mcenc.candidates[ci][0];
candy=mcenc.candidates[ci][1];
/*If we've already examined this vector, then we would be using it if it
was better than what we are using.*/
hitbit=(ogg_int32_t)1<<candx+15;
if(hit_cache[candy+15]&hitbit)continue;
hit_cache[candy+15]|=hitbit;
err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
if(err<best_err){
best_err=err;
best_vec[0]=candx;
best_vec[1]=candy;
}
if(_frame==OC_FRAME_PREV){
for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
best_block_err[bi]=block_err[bi];
best_block_vec[bi][0]=candx;
best_block_vec[bi][1]=candy;
}
}
}
if(best_err>t2){
/*Examine the candidates in set B.*/
for(;ci<mcenc.ncandidates;ci++){
candx=mcenc.candidates[ci][0];
candy=mcenc.candidates[ci][1];
hitbit=(ogg_int32_t)1<<candx+15;
if(hit_cache[candy+15]&hitbit)continue;
hit_cache[candy+15]|=hitbit;
err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
if(err<best_err){
best_err=err;
best_vec[0]=candx;
best_vec[1]=candy;
}
if(_frame==OC_FRAME_PREV){
for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
best_block_err[bi]=block_err[bi];
best_block_vec[bi][0]=candx;
best_block_vec[bi][1]=candy;
}
}
}
/*Use the same threshold for set B as in set A.*/
if(best_err>t2){
int best_site;
int nsites;
int sitei;
int site;
int b;
/*Square pattern search.*/
for(;;){
best_site=4;
/*Compose the bit flags for boundary conditions.*/
b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1|
OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3;
nsites=OC_SQUARE_NSITES[b];
for(sitei=0;sitei<nsites;sitei++){
site=OC_SQUARE_SITES[b][sitei];
candx=best_vec[0]+OC_SQUARE_DX[site];
candy=best_vec[1]+OC_SQUARE_DY[site];
hitbit=(ogg_int32_t)1<<candx+15;
if(hit_cache[candy+15]&hitbit)continue;
hit_cache[candy+15]|=hitbit;
err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
if(err<best_err){
best_err=err;
best_site=site;
}
if(_frame==OC_FRAME_PREV){
for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
best_block_err[bi]=block_err[bi];
best_block_vec[bi][0]=candx;
best_block_vec[bi][1]=candy;
}
}
}
if(best_site==4)break;
best_vec[0]+=OC_SQUARE_DX[best_site];
best_vec[1]+=OC_SQUARE_DY[best_site];
}
/*Final 4-MV search.*/
/*Simply use 1/4 of the macro block set A and B threshold as the
individual block threshold.*/
if(_frame==OC_FRAME_PREV){
t2>>=2;
for(bi=0;bi<4;bi++){
if(best_block_err[bi]>t2){
/*Square pattern search.
We do this in a slightly interesting manner.
We continue to check the SAD of all four blocks in the
macro block.
This gives us two things:
1) We can continue to use the hit_cache to avoid duplicate
checks.
Otherwise we could continue to read it, but not write to it
without saving and restoring it for each block.
Note that we could still eliminate a large number of
duplicate checks by taking into account the site we came
from when choosing the site list.
We can still do that to avoid extra hit_cache queries, and
it might even be a speed win.
2) It gives us a slightly better chance of escaping local
minima.
We would not be here if we weren't doing a fairly bad job
in finding a good vector, and checking these vectors can
save us from 100 to several thousand points off our SAD 1
in 15 times.
TODO: Is this a good idea?
Who knows.
It needs more testing.*/
for(;;){
int bestx;
int besty;
int bj;
bestx=best_block_vec[bi][0];
besty=best_block_vec[bi][1];
/*Compose the bit flags for boundary conditions.*/
b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1|
OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3;
nsites=OC_SQUARE_NSITES[b];
for(sitei=0;sitei<nsites;sitei++){
site=OC_SQUARE_SITES[b][sitei];
candx=bestx+OC_SQUARE_DX[site];
candy=besty+OC_SQUARE_DY[site];
hitbit=(ogg_int32_t)1<<candx+15;
if(hit_cache[candy+15]&hitbit)continue;
hit_cache[candy+15]|=hitbit;
err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
if(err<best_err){
best_err=err;
best_vec[0]=candx;
best_vec[1]=candy;
}
for(bj=0;bj<4;bj++)if(block_err[bj]<best_block_err[bj]){
best_block_err[bj]=block_err[bj];
best_block_vec[bj][0]=candx;
best_block_vec[bj][1]=candy;
}
}
if(best_block_vec[bi][0]==bestx&&best_block_vec[bi][1]==besty){
break;
}
}
}
}
}
}
}
}
embs[_mbi].error[_frame]=(ogg_uint16_t)best_err;
candx=best_vec[0];
candy=best_vec[1];
embs[_mbi].satd[_frame]=oc_mcenc_ysatd_check_mbcandidate_fullpel(_enc,
frag_buf_offs,fragis,candx,candy,src,ref,ystride);
embs[_mbi].analysis_mv[0][_frame][0]=(signed char)(candx<<1);
embs[_mbi].analysis_mv[0][_frame][1]=(signed char)(candy<<1);
if(_frame==OC_FRAME_PREV){
for(bi=0;bi<4;bi++){
candx=best_block_vec[bi][0];
candy=best_block_vec[bi][1];
embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_check_bcandidate_fullpel(_enc,
frag_buf_offs[fragis[bi]],candx,candy,src,ref,ystride);
embs[_mbi].block_mv[bi][0]=(signed char)(candx<<1);
embs[_mbi].block_mv[bi][1]=(signed char)(candy<<1);
}
}
}
void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi){
oc_mv2 *mvs;
int accum_p[2];
int accum_g[2];
mvs=_enc->mb_info[_mbi].analysis_mv;
if(_enc->prevframe_dropped){
accum_p[0]=mvs[0][OC_FRAME_PREV][0];
accum_p[1]=mvs[0][OC_FRAME_PREV][1];
}
else accum_p[1]=accum_p[0]=0;
accum_g[0]=mvs[2][OC_FRAME_GOLD][0];
accum_g[1]=mvs[2][OC_FRAME_GOLD][1];
mvs[0][OC_FRAME_PREV][0]-=mvs[2][OC_FRAME_PREV][0];
mvs[0][OC_FRAME_PREV][1]-=mvs[2][OC_FRAME_PREV][1];
/*Move the motion vector predictors back a frame.*/
memmove(mvs+1,mvs,2*sizeof(*mvs));
/*Search the last frame.*/
oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV);
mvs[2][OC_FRAME_PREV][0]=accum_p[0];
mvs[2][OC_FRAME_PREV][1]=accum_p[1];
/*GOLDEN MVs are different from PREV MVs in that they're each absolute
offsets from some frame in the past rather than relative offsets from the
frame before.
For predictor calculation to make sense, we need them to be in the same
form as PREV MVs.*/
mvs[1][OC_FRAME_GOLD][0]-=mvs[2][OC_FRAME_GOLD][0];
mvs[1][OC_FRAME_GOLD][1]-=mvs[2][OC_FRAME_GOLD][1];
mvs[2][OC_FRAME_GOLD][0]-=accum_g[0];
mvs[2][OC_FRAME_GOLD][1]-=accum_g[1];
/*Search the golden frame.*/
oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD);
/*Put GOLDEN MVs back into absolute offset form.
The newest MV is already an absolute offset.*/
mvs[2][OC_FRAME_GOLD][0]+=accum_g[0];
mvs[2][OC_FRAME_GOLD][1]+=accum_g[1];
mvs[1][OC_FRAME_GOLD][0]+=mvs[2][OC_FRAME_GOLD][0];
mvs[1][OC_FRAME_GOLD][1]+=mvs[2][OC_FRAME_GOLD][1];
}
#if 0
static int oc_mcenc_ysad_halfpel_mbrefine(const oc_enc_ctx *_enc,int _mbi,
int _vec[2],int _best_err,int _frame){
const unsigned char *src;
const unsigned char *ref;
const ptrdiff_t *frag_buf_offs;
const ptrdiff_t *fragis;
int offset_y[9];
int ystride;
int mvoffset_base;
int best_site;
int sitei;
int err;
src=_enc->state.ref_frame_data[OC_FRAME_IO];
ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_framei]];
frag_buf_offs=_enc->state.frag_buf_offs;
fragis=_enc->state.mb_maps[_mbi][0];
ystride=_enc->state.ref_ystride[0];
mvoffset_base=_vec[0]+_vec[1]*ystride;
offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
offset_y[3]=offset_y[5]=0;
offset_y[6]=offset_y[7]=offset_y[8]=ystride;
best_site=4;
for(sitei=0;sitei<8;sitei++){
int site;
int xmask;
int ymask;
int dx;
int dy;
int mvoffset0;
int mvoffset1;
site=OC_SQUARE_SITES[0][sitei];
dx=OC_SQUARE_DX[site];
dy=OC_SQUARE_DY[site];
/*The following code SHOULD be equivalent to
oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
(_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
However, it should also be much faster, as it involves no multiplies and
doesn't have to handle chroma vectors.*/
xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis,
mvoffset0,mvoffset1,src,ref,ystride,_best_err);
if(err<_best_err){
_best_err=err;
best_site=site;
}
}
_vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
_vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
return _best_err;
}
#endif
static unsigned oc_mcenc_ysatd_halfpel_mbrefine(const oc_enc_ctx *_enc,
int _mbi,int _vec[2],unsigned _best_err,int _frame){
const unsigned char *src;
const unsigned char *ref;
const ptrdiff_t *frag_buf_offs;
const ptrdiff_t *fragis;
int offset_y[9];
int ystride;
int mvoffset_base;
int best_site;
int sitei;
int err;
src=_enc->state.ref_frame_data[OC_FRAME_IO];
ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]];
frag_buf_offs=_enc->state.frag_buf_offs;
fragis=_enc->state.mb_maps[_mbi][0];
ystride=_enc->state.ref_ystride[0];
mvoffset_base=_vec[0]+_vec[1]*ystride;
offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
offset_y[3]=offset_y[5]=0;
offset_y[6]=offset_y[7]=offset_y[8]=ystride;
best_site=4;
for(sitei=0;sitei<8;sitei++){
int site;
int xmask;
int ymask;
int dx;
int dy;
int mvoffset0;
int mvoffset1;
site=OC_SQUARE_SITES[0][sitei];
dx=OC_SQUARE_DX[site];
dy=OC_SQUARE_DY[site];
/*The following code SHOULD be equivalent to
oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
(_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
However, it should also be much faster, as it involves no multiplies and
doesn't have to handle chroma vectors.*/
xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
err=oc_satd16_halfpel(_enc,frag_buf_offs,fragis,
mvoffset0,mvoffset1,src,ref,ystride,_best_err);
if(err<_best_err){
_best_err=err;
best_site=site;
}
}
_vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
_vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
return _best_err;
}
void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame){
oc_mb_enc_info *embs;
int vec[2];
embs=_enc->mb_info;
vec[0]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][0]);
vec[1]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][1]);
embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc,
_mbi,vec,embs[_mbi].satd[_frame],_frame);
embs[_mbi].analysis_mv[0][_frame][0]=(signed char)vec[0];
embs[_mbi].analysis_mv[0][_frame][1]=(signed char)vec[1];
}
#if 0
static int oc_mcenc_ysad_halfpel_brefine(const oc_enc_ctx *_enc,
int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
int _offset_y[9],unsigned _best_err){
int mvoffset_base;
int best_site;
int sitei;
mvoffset_base=_vec[0]+_vec[1]*_ystride;
best_site=4;
for(sitei=0;sitei<8;sitei++){
unsigned err;
int site;
int xmask;
int ymask;
int dx;
int dy;
int mvoffset0;
int mvoffset1;
site=OC_SQUARE_SITES[0][sitei];
dx=OC_SQUARE_DX[site];
dy=OC_SQUARE_DY[site];
/*The following code SHOULD be equivalent to
oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
(_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
However, it should also be much faster, as it involves no multiplies and
doesn't have to handle chroma vectors.*/
xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
err=oc_enc_frag_sad2_thresh(_enc,_src,
_ref+mvoffset0,_ref+mvoffset1,ystride,_best_err);
if(err<_best_err){
_best_err=err;
best_site=site;
}
}
_vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
_vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
return _best_err;
}
#endif
static unsigned oc_mcenc_ysatd_halfpel_brefine(const oc_enc_ctx *_enc,
int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
int _offset_y[9],unsigned _best_err){
int mvoffset_base;
int best_site;
int sitei;
mvoffset_base=_vec[0]+_vec[1]*_ystride;
best_site=4;
for(sitei=0;sitei<8;sitei++){
unsigned err;
int site;
int xmask;
int ymask;
int dx;
int dy;
int mvoffset0;
int mvoffset1;
site=OC_SQUARE_SITES[0][sitei];
dx=OC_SQUARE_DX[site];
dy=OC_SQUARE_DY[site];
/*The following code SHOULD be equivalent to
oc_state_get_mv_offsets(&_enc->state,&mvoffsets,0,
(_vec[0]<<1)+dx,(_vec[1]<<1)+dy);
However, it should also be much faster, as it involves no multiplies and
doesn't have to handle chroma vectors.*/
xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
err=oc_enc_frag_satd2_thresh(_enc,_src,
_ref+mvoffset0,_ref+mvoffset1,_ystride,_best_err);
if(err<_best_err){
_best_err=err;
best_site=site;
}
}
_vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
_vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
return _best_err;
}
void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi){
oc_mb_enc_info *embs;
const ptrdiff_t *frag_buf_offs;
const ptrdiff_t *fragis;
const unsigned char *src;
const unsigned char *ref;
int offset_y[9];
int ystride;
int bi;
ystride=_enc->state.ref_ystride[0];
frag_buf_offs=_enc->state.frag_buf_offs;
fragis=_enc->state.mb_maps[_mbi][0];
src=_enc->state.ref_frame_data[OC_FRAME_IO];
ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
offset_y[3]=offset_y[5]=0;
offset_y[6]=offset_y[7]=offset_y[8]=ystride;
embs=_enc->mb_info;
for(bi=0;bi<4;bi++){
ptrdiff_t frag_offs;
int vec[2];
frag_offs=frag_buf_offs[fragis[bi]];
vec[0]=OC_DIV2(embs[_mbi].block_mv[bi][0]);
vec[1]=OC_DIV2(embs[_mbi].block_mv[bi][1]);
embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec,
src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]);
embs[_mbi].ref_mv[bi][0]=(signed char)vec[0];
embs[_mbi].ref_mv[bi][1]=(signed char)vec[1];
}
}

View File

@ -1,947 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: mcomp.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include "encoder_internal.h"
/* Initialises motion compentsation. */
void InitMotionCompensation ( CP_INSTANCE *cpi ){
int i;
int SearchSite=0;
int Len;
int LineStepY = (ogg_int32_t)cpi->pb.YStride;
Len=((MAX_MV_EXTENT/2)+1)/2;
/* How many search stages are there. */
cpi->MVSearchSteps = 0;
/* Set up offsets arrays used in half pixel correction. */
cpi->HalfPixelRef2Offset[0] = -LineStepY - 1;
cpi->HalfPixelRef2Offset[1] = -LineStepY;
cpi->HalfPixelRef2Offset[2] = -LineStepY + 1;
cpi->HalfPixelRef2Offset[3] = - 1;
cpi->HalfPixelRef2Offset[4] = 0;
cpi->HalfPixelRef2Offset[5] = 1;
cpi->HalfPixelRef2Offset[6] = LineStepY - 1;
cpi->HalfPixelRef2Offset[7] = LineStepY;
cpi->HalfPixelRef2Offset[8] = LineStepY + 1;
cpi->HalfPixelXOffset[0] = -1;
cpi->HalfPixelXOffset[1] = 0;
cpi->HalfPixelXOffset[2] = 1;
cpi->HalfPixelXOffset[3] = -1;
cpi->HalfPixelXOffset[4] = 0;
cpi->HalfPixelXOffset[5] = 1;
cpi->HalfPixelXOffset[6] = -1;
cpi->HalfPixelXOffset[7] = 0;
cpi->HalfPixelXOffset[8] = 1;
cpi->HalfPixelYOffset[0] = -1;
cpi->HalfPixelYOffset[1] = -1;
cpi->HalfPixelYOffset[2] = -1;
cpi->HalfPixelYOffset[3] = 0;
cpi->HalfPixelYOffset[4] = 0;
cpi->HalfPixelYOffset[5] = 0;
cpi->HalfPixelYOffset[6] = 1;
cpi->HalfPixelYOffset[7] = 1;
cpi->HalfPixelYOffset[8] = 1;
/* Generate offsets for 8 search sites per step. */
while ( Len>0 ) {
/* Another step. */
cpi->MVSearchSteps += 1;
/* Compute offsets for search sites. */
cpi->MVOffsetX[SearchSite] = -Len;
cpi->MVOffsetY[SearchSite++] = -Len;
cpi->MVOffsetX[SearchSite] = 0;
cpi->MVOffsetY[SearchSite++] = -Len;
cpi->MVOffsetX[SearchSite] = Len;
cpi->MVOffsetY[SearchSite++] = -Len;
cpi->MVOffsetX[SearchSite] = -Len;
cpi->MVOffsetY[SearchSite++] = 0;
cpi->MVOffsetX[SearchSite] = Len;
cpi->MVOffsetY[SearchSite++] = 0;
cpi->MVOffsetX[SearchSite] = -Len;
cpi->MVOffsetY[SearchSite++] = Len;
cpi->MVOffsetX[SearchSite] = 0;
cpi->MVOffsetY[SearchSite++] = Len;
cpi->MVOffsetX[SearchSite] = Len;
cpi->MVOffsetY[SearchSite++] = Len;
/* Contract. */
Len /= 2;
}
/* Compute pixel index offsets. */
for ( i=SearchSite-1; i>=0; i-- )
cpi->MVPixelOffsetY[i] = (cpi->MVOffsetY[i]*LineStepY) + cpi->MVOffsetX[i];
}
static ogg_uint32_t GetInterErr (unsigned char * NewDataPtr,
unsigned char * RefDataPtr1,
unsigned char * RefDataPtr2,
ogg_uint32_t PixelsPerLine ) {
ogg_uint32_t i;
ogg_int32_t XSum=0;
ogg_int32_t XXSum=0;
ogg_int32_t DiffVal;
ogg_int32_t AbsRefOffset = abs((int)(RefDataPtr1 - RefDataPtr2));
/* Mode of interpolation chosen based upon on the offset of the
second reference pointer */
if ( AbsRefOffset == 0 ) {
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ ) {
DiffVal = ((int)NewDataPtr[0]) - (int)RefDataPtr1[0];
XSum += DiffVal;
/* negative array indexes are strictly forbidden by ANSI C and C99 */
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[1]) - (int)RefDataPtr1[1];
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[2]) - (int)RefDataPtr1[2];
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[3]) - (int)RefDataPtr1[3];
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[4]) - (int)RefDataPtr1[4];
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[5]) - (int)RefDataPtr1[5];
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[6]) - (int)RefDataPtr1[6];
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[7]) - (int)RefDataPtr1[7];
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
/* Step to next row of block. */
NewDataPtr += PixelsPerLine;
RefDataPtr1 += STRIDE_EXTRA + PixelsPerLine;
}
}else{
/* Simple two reference interpolation */
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ ) {
DiffVal = ((int)NewDataPtr[0]) -
(((int)RefDataPtr1[0] + (int)RefDataPtr2[0]) / 2);
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[1]) -
(((int)RefDataPtr1[1] + (int)RefDataPtr2[1]) / 2);
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[2]) -
(((int)RefDataPtr1[2] + (int)RefDataPtr2[2]) / 2);
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[3]) -
(((int)RefDataPtr1[3] + (int)RefDataPtr2[3]) / 2);
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[4]) -
(((int)RefDataPtr1[4] + (int)RefDataPtr2[4]) / 2);
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[5]) -
(((int)RefDataPtr1[5] + (int)RefDataPtr2[5]) / 2);
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[6]) -
(((int)RefDataPtr1[6] + (int)RefDataPtr2[6]) / 2);
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
DiffVal = ((int)NewDataPtr[7]) -
(((int)RefDataPtr1[7] + (int)RefDataPtr2[7]) / 2);
XSum += DiffVal;
XXSum += DiffVal*DiffVal;
/* Step to next row of block. */
NewDataPtr += PixelsPerLine;
RefDataPtr1 += STRIDE_EXTRA+PixelsPerLine;
RefDataPtr2 += STRIDE_EXTRA+PixelsPerLine;
}
}
/* Compute and return population variance as mis-match metric. */
return (( (XXSum<<6) - XSum*XSum ));
}
static ogg_uint32_t GetSumAbsDiffs (unsigned char * NewDataPtr,
unsigned char * RefDataPtr,
ogg_uint32_t PixelsPerLine,
ogg_uint32_t ErrorSoFar) {
ogg_uint32_t i;
ogg_uint32_t DiffVal = ErrorSoFar;
/* Decide on standard or MMX implementation */
for ( i=0; i < BLOCK_HEIGHT_WIDTH; i++ ) {
DiffVal += abs( ((int)NewDataPtr[0]) - ((int)RefDataPtr[0]) );
DiffVal += abs( ((int)NewDataPtr[1]) - ((int)RefDataPtr[1]) );
DiffVal += abs( ((int)NewDataPtr[2]) - ((int)RefDataPtr[2]) );
DiffVal += abs( ((int)NewDataPtr[3]) - ((int)RefDataPtr[3]) );
DiffVal += abs( ((int)NewDataPtr[4]) - ((int)RefDataPtr[4]) );
DiffVal += abs( ((int)NewDataPtr[5]) - ((int)RefDataPtr[5]) );
DiffVal += abs( ((int)NewDataPtr[6]) - ((int)RefDataPtr[6]) );
DiffVal += abs( ((int)NewDataPtr[7]) - ((int)RefDataPtr[7]) );
/* Step to next row of block. */
NewDataPtr += PixelsPerLine;
RefDataPtr += STRIDE_EXTRA+PixelsPerLine;
}
return DiffVal;
}
static ogg_uint32_t GetNextSumAbsDiffs (unsigned char * NewDataPtr,
unsigned char * RefDataPtr,
ogg_uint32_t PixelsPerLine,
ogg_uint32_t ErrorSoFar,
ogg_uint32_t BestSoFar ) {
ogg_uint32_t i;
ogg_uint32_t DiffVal = ErrorSoFar;
for ( i=0; i < BLOCK_HEIGHT_WIDTH; i++ ) {
DiffVal += abs( ((int)NewDataPtr[0]) - ((int)RefDataPtr[0]) );
DiffVal += abs( ((int)NewDataPtr[1]) - ((int)RefDataPtr[1]) );
DiffVal += abs( ((int)NewDataPtr[2]) - ((int)RefDataPtr[2]) );
DiffVal += abs( ((int)NewDataPtr[3]) - ((int)RefDataPtr[3]) );
DiffVal += abs( ((int)NewDataPtr[4]) - ((int)RefDataPtr[4]) );
DiffVal += abs( ((int)NewDataPtr[5]) - ((int)RefDataPtr[5]) );
DiffVal += abs( ((int)NewDataPtr[6]) - ((int)RefDataPtr[6]) );
DiffVal += abs( ((int)NewDataPtr[7]) - ((int)RefDataPtr[7]) );
if ( DiffVal > BestSoFar )break;
/* Step to next row of block. */
NewDataPtr += PixelsPerLine;
RefDataPtr += STRIDE_EXTRA+PixelsPerLine;
}
return DiffVal;
}
static ogg_uint32_t GetHalfPixelSumAbsDiffs (unsigned char * SrcData,
unsigned char * RefDataPtr1,
unsigned char * RefDataPtr2,
ogg_uint32_t PixelsPerLine,
ogg_uint32_t ErrorSoFar,
ogg_uint32_t BestSoFar ) {
ogg_uint32_t i;
ogg_uint32_t DiffVal = ErrorSoFar;
ogg_int32_t RefOffset = (int)(RefDataPtr1 - RefDataPtr2);
ogg_uint32_t RefPixelsPerLine = PixelsPerLine + STRIDE_EXTRA;
if ( RefOffset == 0 ) {
/* Simple case as for non 0.5 pixel */
DiffVal += GetSumAbsDiffs( SrcData, RefDataPtr1, PixelsPerLine,
ErrorSoFar);
} else {
for ( i=0; i < BLOCK_HEIGHT_WIDTH; i++ ) {
DiffVal += abs( ((int)SrcData[0]) - (((int)RefDataPtr1[0] +
(int)RefDataPtr2[0]) / 2) );
DiffVal += abs( ((int)SrcData[1]) - (((int)RefDataPtr1[1] +
(int)RefDataPtr2[1]) / 2) );
DiffVal += abs( ((int)SrcData[2]) - (((int)RefDataPtr1[2] +
(int)RefDataPtr2[2]) / 2) );
DiffVal += abs( ((int)SrcData[3]) - (((int)RefDataPtr1[3] +
(int)RefDataPtr2[3]) / 2) );
DiffVal += abs( ((int)SrcData[4]) - (((int)RefDataPtr1[4] +
(int)RefDataPtr2[4]) / 2) );
DiffVal += abs( ((int)SrcData[5]) - (((int)RefDataPtr1[5] +
(int)RefDataPtr2[5]) / 2) );
DiffVal += abs( ((int)SrcData[6]) - (((int)RefDataPtr1[6] +
(int)RefDataPtr2[6]) / 2) );
DiffVal += abs( ((int)SrcData[7]) - (((int)RefDataPtr1[7] +
(int)RefDataPtr2[7]) / 2) );
if ( DiffVal > BestSoFar ) break;
/* Step to next row of block. */
SrcData += PixelsPerLine;
RefDataPtr1 += RefPixelsPerLine;
RefDataPtr2 += RefPixelsPerLine;
}
}
return DiffVal;
}
static ogg_uint32_t GetIntraError (unsigned char * DataPtr,
ogg_uint32_t PixelsPerLine ) {
ogg_uint32_t i;
ogg_uint32_t XSum=0;
ogg_uint32_t XXSum=0;
unsigned char *DiffPtr;
/* Loop expanded out for speed. */
DiffPtr = DataPtr;
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ ) {
/* Examine alternate pixel locations. */
XSum += DiffPtr[0];
XXSum += DiffPtr[0]*DiffPtr[0];
XSum += DiffPtr[1];
XXSum += DiffPtr[1]*DiffPtr[1];
XSum += DiffPtr[2];
XXSum += DiffPtr[2]*DiffPtr[2];
XSum += DiffPtr[3];
XXSum += DiffPtr[3]*DiffPtr[3];
XSum += DiffPtr[4];
XXSum += DiffPtr[4]*DiffPtr[4];
XSum += DiffPtr[5];
XXSum += DiffPtr[5]*DiffPtr[5];
XSum += DiffPtr[6];
XXSum += DiffPtr[6]*DiffPtr[6];
XSum += DiffPtr[7];
XXSum += DiffPtr[7]*DiffPtr[7];
/* Step to next row of block. */
DiffPtr += PixelsPerLine;
}
/* Compute population variance as mis-match metric. */
return (( (XXSum<<6) - XSum*XSum ) );
}
ogg_uint32_t GetMBIntraError (CP_INSTANCE *cpi, ogg_uint32_t FragIndex,
ogg_uint32_t PixelsPerLine ) {
ogg_uint32_t LocalFragIndex = FragIndex;
ogg_uint32_t IntraError = 0;
/* Add together the intra errors for those blocks in the macro block
that are coded (Y only) */
if ( cpi->pb.display_fragments[LocalFragIndex] )
IntraError +=
GetIntraError(&cpi->
ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]],
PixelsPerLine );
LocalFragIndex++;
if ( cpi->pb.display_fragments[LocalFragIndex] )
IntraError +=
GetIntraError(&cpi->
ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]],
PixelsPerLine );
LocalFragIndex = FragIndex + cpi->pb.HFragments;
if ( cpi->pb.display_fragments[LocalFragIndex] )
IntraError +=
GetIntraError(&cpi->
ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]],
PixelsPerLine );
LocalFragIndex++;
if ( cpi->pb.display_fragments[LocalFragIndex] )
IntraError +=
GetIntraError(&cpi->
ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]],
PixelsPerLine );
return IntraError;
}
ogg_uint32_t GetMBInterError (CP_INSTANCE *cpi,
unsigned char * SrcPtr,
unsigned char * RefPtr,
ogg_uint32_t FragIndex,
ogg_int32_t LastXMV,
ogg_int32_t LastYMV,
ogg_uint32_t PixelsPerLine ) {
ogg_uint32_t RefPixelsPerLine = cpi->pb.YStride;
ogg_uint32_t LocalFragIndex = FragIndex;
ogg_int32_t PixelIndex;
ogg_int32_t RefPixelIndex;
ogg_int32_t RefPixelOffset;
ogg_int32_t RefPtr2Offset;
ogg_uint32_t InterError = 0;
unsigned char * SrcPtr1;
unsigned char * RefPtr1;
/* Work out pixel offset into source buffer. */
PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex];
/* Work out the pixel offset in reference buffer for the default
motion vector */
RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex];
RefPixelOffset = ((LastYMV/2) * RefPixelsPerLine) + (LastXMV/2);
/* Work out the second reference pointer offset. */
RefPtr2Offset = 0;
if ( LastXMV % 2 ) {
if ( LastXMV > 0 )
RefPtr2Offset += 1;
else
RefPtr2Offset -= 1;
}
if ( LastYMV % 2 ) {
if ( LastYMV > 0 )
RefPtr2Offset += RefPixelsPerLine;
else
RefPtr2Offset -= RefPixelsPerLine;
}
/* Add together the errors for those blocks in the macro block that
are coded (Y only) */
if ( cpi->pb.display_fragments[LocalFragIndex] ) {
SrcPtr1 = &SrcPtr[PixelIndex];
RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset];
InterError += GetInterErr( SrcPtr1, RefPtr1,
&RefPtr1[RefPtr2Offset], PixelsPerLine );
}
LocalFragIndex++;
if ( cpi->pb.display_fragments[LocalFragIndex] ) {
PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex];
RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex];
SrcPtr1 = &SrcPtr[PixelIndex];
RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset];
InterError += GetInterErr( SrcPtr1, RefPtr1,
&RefPtr1[RefPtr2Offset], PixelsPerLine );
}
LocalFragIndex = FragIndex + cpi->pb.HFragments;
if ( cpi->pb.display_fragments[LocalFragIndex] ) {
PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex];
RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex];
SrcPtr1 = &SrcPtr[PixelIndex];
RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset];
InterError += GetInterErr( SrcPtr1, RefPtr1,
&RefPtr1[RefPtr2Offset], PixelsPerLine );
}
LocalFragIndex++;
if ( cpi->pb.display_fragments[LocalFragIndex] ) {
PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex];
RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex];
SrcPtr1 = &SrcPtr[PixelIndex];
RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset];
InterError += GetInterErr( SrcPtr1, RefPtr1,
&RefPtr1[RefPtr2Offset], PixelsPerLine );
}
return InterError;
}
ogg_uint32_t GetMBMVInterError (CP_INSTANCE *cpi,
unsigned char * RefFramePtr,
ogg_uint32_t FragIndex,
ogg_uint32_t PixelsPerLine,
ogg_int32_t *MVPixelOffset,
MOTION_VECTOR *MV ) {
ogg_uint32_t Error = 0;
ogg_uint32_t MinError;
ogg_uint32_t InterMVError = 0;
ogg_int32_t i;
ogg_int32_t x=0, y=0;
ogg_int32_t step;
ogg_int32_t SearchSite=0;
unsigned char *SrcPtr[4] = {NULL,NULL,NULL,NULL};
unsigned char *RefPtr=NULL;
unsigned char *CandidateBlockPtr=NULL;
unsigned char *BestBlockPtr=NULL;
ogg_uint32_t RefRow2Offset = cpi->pb.YStride * 8;
int MBlockDispFrags[4];
/* Half pixel variables */
ogg_int32_t HalfPixelError;
ogg_int32_t BestHalfPixelError;
unsigned char BestHalfOffset;
unsigned char * RefDataPtr1;
unsigned char * RefDataPtr2;
/* Note which of the four blocks in the macro block are to be
included in the search. */
MBlockDispFrags[0] =
cpi->pb.display_fragments[FragIndex];
MBlockDispFrags[1] =
cpi->pb.display_fragments[FragIndex + 1];
MBlockDispFrags[2] =
cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments];
MBlockDispFrags[3] =
cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments + 1];
/* Set up the source pointers for the four source blocks. */
SrcPtr[0] = &cpi->ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]];
SrcPtr[1] = SrcPtr[0] + 8;
SrcPtr[2] = SrcPtr[0] + (PixelsPerLine * 8);
SrcPtr[3] = SrcPtr[2] + 8;
/* Set starting reference point for search. */
RefPtr = &RefFramePtr[cpi->pb.recon_pixel_index_table[FragIndex]];
/* Check the 0,0 candidate. */
if ( MBlockDispFrags[0] ) {
Error = GetSumAbsDiffs( SrcPtr[0], RefPtr,
PixelsPerLine, Error);
}
if ( MBlockDispFrags[1] ) {
Error = GetSumAbsDiffs( SrcPtr[1], RefPtr + 8,
PixelsPerLine, Error);
}
if ( MBlockDispFrags[2] ) {
Error = GetSumAbsDiffs( SrcPtr[2], RefPtr + RefRow2Offset,
PixelsPerLine, Error);
}
if ( MBlockDispFrags[3] ) {
Error = GetSumAbsDiffs( SrcPtr[3], RefPtr + RefRow2Offset + 8,
PixelsPerLine, Error);
}
/* Set starting values to results of 0, 0 vector. */
MinError = Error;
BestBlockPtr = RefPtr;
x = 0;
y = 0;
MV->x = 0;
MV->y = 0;
/* Proceed through N-steps. */
for ( step=0; step<cpi->MVSearchSteps; step++ ) {
/* Search the 8-neighbours at distance pertinent to current step.*/
for ( i=0; i<8; i++ ) {
/* Set pointer to next candidate matching block. */
CandidateBlockPtr = RefPtr + MVPixelOffset[SearchSite];
/* Reset error */
Error = 0;
/* Get the score for the current offset */
if ( MBlockDispFrags[0] ) {
Error = GetSumAbsDiffs( SrcPtr[0], CandidateBlockPtr,
PixelsPerLine, Error);
}
if ( MBlockDispFrags[1] && (Error < MinError) ) {
Error = GetNextSumAbsDiffs( SrcPtr[1], CandidateBlockPtr + 8,
PixelsPerLine, Error, MinError );
}
if ( MBlockDispFrags[2] && (Error < MinError) ) {
Error = GetNextSumAbsDiffs( SrcPtr[2], CandidateBlockPtr + RefRow2Offset,
PixelsPerLine, Error, MinError );
}
if ( MBlockDispFrags[3] && (Error < MinError) ) {
Error = GetNextSumAbsDiffs( SrcPtr[3],
CandidateBlockPtr + RefRow2Offset + 8,
PixelsPerLine, Error, MinError );
}
if ( Error < MinError ) {
/* Remember best match. */
MinError = Error;
BestBlockPtr = CandidateBlockPtr;
/* Where is it. */
x = MV->x + cpi->MVOffsetX[SearchSite];
y = MV->y + cpi->MVOffsetY[SearchSite];
}
/* Move to next search location. */
SearchSite += 1;
}
/* Move to best location this step. */
RefPtr = BestBlockPtr;
MV->x = x;
MV->y = y;
}
/* Factor vectors to 1/2 pixel resoultion. */
MV->x = (MV->x * 2);
MV->y = (MV->y * 2);
/* Now do the half pixel pass */
BestHalfOffset = 4; /* Default to the no offset case. */
BestHalfPixelError = MinError;
/* Get the half pixel error for each half pixel offset */
for ( i=0; i < 9; i++ ) {
HalfPixelError = 0;
if ( MBlockDispFrags[0] ) {
RefDataPtr1 = BestBlockPtr;
RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
HalfPixelError =
GetHalfPixelSumAbsDiffs( SrcPtr[0], RefDataPtr1, RefDataPtr2,
PixelsPerLine, HalfPixelError, BestHalfPixelError );
}
if ( MBlockDispFrags[1] && (HalfPixelError < BestHalfPixelError) ) {
RefDataPtr1 = BestBlockPtr + 8;
RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
HalfPixelError =
GetHalfPixelSumAbsDiffs( SrcPtr[1], RefDataPtr1, RefDataPtr2,
PixelsPerLine, HalfPixelError, BestHalfPixelError );
}
if ( MBlockDispFrags[2] && (HalfPixelError < BestHalfPixelError) ) {
RefDataPtr1 = BestBlockPtr + RefRow2Offset;
RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
HalfPixelError =
GetHalfPixelSumAbsDiffs( SrcPtr[2], RefDataPtr1, RefDataPtr2,
PixelsPerLine, HalfPixelError, BestHalfPixelError );
}
if ( MBlockDispFrags[3] && (HalfPixelError < BestHalfPixelError) ) {
RefDataPtr1 = BestBlockPtr + RefRow2Offset + 8;
RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
HalfPixelError =
GetHalfPixelSumAbsDiffs( SrcPtr[3], RefDataPtr1, RefDataPtr2,
PixelsPerLine, HalfPixelError, BestHalfPixelError );
}
if ( HalfPixelError < BestHalfPixelError ) {
BestHalfOffset = (unsigned char)i;
BestHalfPixelError = HalfPixelError;
}
}
/* Half pixel adjust the MV */
MV->x += cpi->HalfPixelXOffset[BestHalfOffset];
MV->y += cpi->HalfPixelYOffset[BestHalfOffset];
/* Get the error score for the chosen 1/2 pixel offset as a variance. */
InterMVError = GetMBInterError( cpi, cpi->ConvDestBuffer, RefFramePtr,
FragIndex, MV->x, MV->y, PixelsPerLine );
/* Return score of best matching block. */
return InterMVError;
}
ogg_uint32_t GetMBMVExhaustiveSearch (CP_INSTANCE *cpi,
unsigned char * RefFramePtr,
ogg_uint32_t FragIndex,
ogg_uint32_t PixelsPerLine,
MOTION_VECTOR *MV ) {
ogg_uint32_t Error = 0;
ogg_uint32_t MinError = HUGE_ERROR;
ogg_uint32_t InterMVError = 0;
ogg_int32_t i, j;
ogg_int32_t x=0, y=0;
unsigned char *SrcPtr[4] = {NULL,NULL,NULL,NULL};
unsigned char *RefPtr;
unsigned char *CandidateBlockPtr=NULL;
unsigned char *BestBlockPtr=NULL;
ogg_uint32_t RefRow2Offset = cpi->pb.YStride * 8;
int MBlockDispFrags[4];
/* Half pixel variables */
ogg_int32_t HalfPixelError;
ogg_int32_t BestHalfPixelError;
unsigned char BestHalfOffset;
unsigned char * RefDataPtr1;
unsigned char * RefDataPtr2;
/* Note which of the four blocks in the macro block are to be
included in the search. */
MBlockDispFrags[0] = cpi->
pb.display_fragments[FragIndex];
MBlockDispFrags[1] = cpi->
pb.display_fragments[FragIndex + 1];
MBlockDispFrags[2] = cpi->
pb.display_fragments[FragIndex + cpi->pb.HFragments];
MBlockDispFrags[3] = cpi->
pb.display_fragments[FragIndex + cpi->pb.HFragments + 1];
/* Set up the source pointers for the four source blocks. */
SrcPtr[0] = &cpi->
ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]];
SrcPtr[1] = SrcPtr[0] + 8;
SrcPtr[2] = SrcPtr[0] + (PixelsPerLine * 8);
SrcPtr[3] = SrcPtr[2] + 8;
RefPtr = &RefFramePtr[cpi->pb.recon_pixel_index_table[FragIndex]];
RefPtr = RefPtr - ((MAX_MV_EXTENT/2) * cpi->
pb.YStride) - (MAX_MV_EXTENT/2);
/* Search each pixel alligned site */
for ( i = 0; i < (ogg_int32_t)MAX_MV_EXTENT; i ++ ) {
/* Starting position in row */
CandidateBlockPtr = RefPtr;
for ( j = 0; j < (ogg_int32_t)MAX_MV_EXTENT; j++ ) {
/* Reset error */
Error = 0;
/* Summ errors for each block. */
if ( MBlockDispFrags[0] ) {
Error = GetSumAbsDiffs( SrcPtr[0], CandidateBlockPtr,
PixelsPerLine, Error);
}
if ( MBlockDispFrags[1] ){
Error = GetSumAbsDiffs( SrcPtr[1], CandidateBlockPtr + 8,
PixelsPerLine, Error);
}
if ( MBlockDispFrags[2] ){
Error = GetSumAbsDiffs( SrcPtr[2], CandidateBlockPtr + RefRow2Offset,
PixelsPerLine, Error);
}
if ( MBlockDispFrags[3] ){
Error = GetSumAbsDiffs( SrcPtr[3], CandidateBlockPtr + RefRow2Offset + 8,
PixelsPerLine, Error);
}
/* Was this the best so far */
if ( Error < MinError ) {
MinError = Error;
BestBlockPtr = CandidateBlockPtr;
x = 16 + j - MAX_MV_EXTENT;
y = 16 + i - MAX_MV_EXTENT;
}
/* Move the the next site */
CandidateBlockPtr ++;
}
/* Move on to the next row. */
RefPtr += cpi->pb.YStride;
}
/* Factor vectors to 1/2 pixel resoultion. */
MV->x = (x * 2);
MV->y = (y * 2);
/* Now do the half pixel pass */
BestHalfOffset = 4; /* Default to the no offset case. */
BestHalfPixelError = MinError;
/* Get the half pixel error for each half pixel offset */
for ( i=0; i < 9; i++ ) {
HalfPixelError = 0;
if ( MBlockDispFrags[0] ) {
RefDataPtr1 = BestBlockPtr;
RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
HalfPixelError =
GetHalfPixelSumAbsDiffs( SrcPtr[0], RefDataPtr1, RefDataPtr2,
PixelsPerLine, HalfPixelError, BestHalfPixelError );
}
if ( MBlockDispFrags[1] && (HalfPixelError < BestHalfPixelError) ) {
RefDataPtr1 = BestBlockPtr + 8;
RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
HalfPixelError =
GetHalfPixelSumAbsDiffs( SrcPtr[1], RefDataPtr1, RefDataPtr2,
PixelsPerLine, HalfPixelError, BestHalfPixelError );
}
if ( MBlockDispFrags[2] && (HalfPixelError < BestHalfPixelError) ) {
RefDataPtr1 = BestBlockPtr + RefRow2Offset;
RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
HalfPixelError =
GetHalfPixelSumAbsDiffs( SrcPtr[2], RefDataPtr1, RefDataPtr2,
PixelsPerLine, HalfPixelError, BestHalfPixelError );
}
if ( MBlockDispFrags[3] && (HalfPixelError < BestHalfPixelError) ) {
RefDataPtr1 = BestBlockPtr + RefRow2Offset + 8;
RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
HalfPixelError =
GetHalfPixelSumAbsDiffs( SrcPtr[3], RefDataPtr1, RefDataPtr2,
PixelsPerLine, HalfPixelError, BestHalfPixelError );
}
if ( HalfPixelError < BestHalfPixelError ){
BestHalfOffset = (unsigned char)i;
BestHalfPixelError = HalfPixelError;
}
}
/* Half pixel adjust the MV */
MV->x += cpi->HalfPixelXOffset[BestHalfOffset];
MV->y += cpi->HalfPixelYOffset[BestHalfOffset];
/* Get the error score for the chosen 1/2 pixel offset as a variance. */
InterMVError = GetMBInterError( cpi, cpi->ConvDestBuffer, RefFramePtr,
FragIndex, MV->x, MV->y, PixelsPerLine );
/* Return score of best matching block. */
return InterMVError;
}
static ogg_uint32_t GetBMVExhaustiveSearch (CP_INSTANCE *cpi,
unsigned char * RefFramePtr,
ogg_uint32_t FragIndex,
ogg_uint32_t PixelsPerLine,
MOTION_VECTOR *MV ) {
ogg_uint32_t Error = 0;
ogg_uint32_t MinError = HUGE_ERROR;
ogg_uint32_t InterMVError = 0;
ogg_int32_t i, j;
ogg_int32_t x=0, y=0;
unsigned char *SrcPtr = NULL;
unsigned char *RefPtr;
unsigned char *CandidateBlockPtr=NULL;
unsigned char *BestBlockPtr=NULL;
/* Half pixel variables */
ogg_int32_t HalfPixelError;
ogg_int32_t BestHalfPixelError;
unsigned char BestHalfOffset;
unsigned char * RefDataPtr2;
/* Set up the source pointer for the block. */
SrcPtr = &cpi->
ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]];
RefPtr = &RefFramePtr[cpi->pb.recon_pixel_index_table[FragIndex]];
RefPtr = RefPtr - ((MAX_MV_EXTENT/2) *
cpi->pb.YStride) - (MAX_MV_EXTENT/2);
/* Search each pixel alligned site */
for ( i = 0; i < (ogg_int32_t)MAX_MV_EXTENT; i ++ ) {
/* Starting position in row */
CandidateBlockPtr = RefPtr;
for ( j = 0; j < (ogg_int32_t)MAX_MV_EXTENT; j++ ){
/* Get the block error score. */
Error = GetSumAbsDiffs( SrcPtr, CandidateBlockPtr,
PixelsPerLine, 0);
/* Was this the best so far */
if ( Error < MinError ) {
MinError = Error;
BestBlockPtr = CandidateBlockPtr;
x = 16 + j - MAX_MV_EXTENT;
y = 16 + i - MAX_MV_EXTENT;
}
/* Move the the next site */
CandidateBlockPtr ++;
}
/* Move on to the next row. */
RefPtr += cpi->pb.YStride;
}
/* Factor vectors to 1/2 pixel resoultion. */
MV->x = (x * 2);
MV->y = (y * 2);
/* Now do the half pixel pass */
BestHalfOffset = 4; /* Default to the no offset case. */
BestHalfPixelError = MinError;
/* Get the half pixel error for each half pixel offset */
for ( i=0; i < 9; i++ ) {
RefDataPtr2 = BestBlockPtr + cpi->HalfPixelRef2Offset[i];
HalfPixelError =
GetHalfPixelSumAbsDiffs( SrcPtr, BestBlockPtr, RefDataPtr2,
PixelsPerLine, 0, BestHalfPixelError );
if ( HalfPixelError < BestHalfPixelError ){
BestHalfOffset = (unsigned char)i;
BestHalfPixelError = HalfPixelError;
}
}
/* Half pixel adjust the MV */
MV->x += cpi->HalfPixelXOffset[BestHalfOffset];
MV->y += cpi->HalfPixelYOffset[BestHalfOffset];
/* Get the variance score at the chosen offset */
RefDataPtr2 = BestBlockPtr + cpi->HalfPixelRef2Offset[BestHalfOffset];
InterMVError =
GetInterErr( SrcPtr, BestBlockPtr, RefDataPtr2, PixelsPerLine );
/* Return score of best matching block. */
return InterMVError;
}
ogg_uint32_t GetFOURMVExhaustiveSearch (CP_INSTANCE *cpi,
unsigned char * RefFramePtr,
ogg_uint32_t FragIndex,
ogg_uint32_t PixelsPerLine,
MOTION_VECTOR *MV ) {
ogg_uint32_t InterMVError;
/* For the moment the 4MV mode is only deemd to be valid if all four
Y blocks are to be updated */
/* This May be adapted later. */
if ( cpi->pb.display_fragments[FragIndex] &&
cpi->pb.display_fragments[FragIndex + 1] &&
cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments] &&
cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments + 1] ) {
/* Reset the error score. */
InterMVError = 0;
/* Get the error component from each coded block */
InterMVError +=
GetBMVExhaustiveSearch(cpi, RefFramePtr, FragIndex,
PixelsPerLine, &(MV[0]) );
InterMVError +=
GetBMVExhaustiveSearch(cpi, RefFramePtr, (FragIndex + 1),
PixelsPerLine, &(MV[1]) );
InterMVError +=
GetBMVExhaustiveSearch(cpi, RefFramePtr,
(FragIndex + cpi->pb.HFragments),
PixelsPerLine, &(MV[2]) );
InterMVError +=
GetBMVExhaustiveSearch(cpi, RefFramePtr,
(FragIndex + cpi->pb.HFragments + 1),
PixelsPerLine, &(MV[3]) );
}else{
InterMVError = HUGE_ERROR;
}
/* Return score of best matching block. */
return InterMVError;
}

View File

@ -1,337 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: misc_common.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include <string.h>
#include "encoder_internal.h"
#include "block_inline.h"
#define FIXED_Q 150
#define MAX_UP_REG_LOOPS 2
/* Gives the initial bytes per block estimate for each Q value */
double BpbTable[Q_TABLE_SIZE] = {
0.42, 0.45, 0.46, 0.49, 0.51, 0.53, 0.56, 0.58,
0.61, 0.64, 0.68, 0.71, 0.74, 0.77, 0.80, 0.84,
0.89, 0.92, 0.98, 1.01, 1.04, 1.13, 1.17, 1.23,
1.28, 1.34, 1.41, 1.45, 1.51, 1.59, 1.69, 1.80,
1.84, 1.94, 2.02, 2.15, 2.23, 2.34, 2.44, 2.50,
2.69, 2.80, 2.87, 3.04, 3.16, 3.29, 3.59, 3.66,
3.86, 3.94, 4.22, 4.50, 4.64, 4.70, 5.24, 5.34,
5.61, 5.87, 6.11, 6.41, 6.71, 6.99, 7.36, 7.69
};
double KfBpbTable[Q_TABLE_SIZE] = {
0.74, 0.81, 0.88, 0.94, 1.00, 1.06, 1.14, 1.19,
1.27, 1.34, 1.42, 1.49, 1.54, 1.59, 1.66, 1.73,
1.80, 1.87, 1.97, 2.01, 2.08, 2.21, 2.25, 2.36,
2.39, 2.50, 2.55, 2.65, 2.71, 2.82, 2.95, 3.01,
3.11, 3.19, 3.31, 3.42, 3.58, 3.66, 3.78, 3.89,
4.11, 4.26, 4.36, 4.39, 4.63, 4.76, 4.85, 5.04,
5.26, 5.29, 5.47, 5.64, 5.76, 6.05, 6.35, 6.67,
6.91, 7.17, 7.40, 7.56, 8.02, 8.45, 8.86, 9.38
};
double GetEstimatedBpb( CP_INSTANCE *cpi, ogg_uint32_t TargetQ ){
ogg_uint32_t i;
ogg_int32_t ThreshTableIndex = Q_TABLE_SIZE - 1;
double BytesPerBlock;
/* Search for the Q table index that matches the given Q. */
for ( i = 0; i < Q_TABLE_SIZE; i++ ) {
if ( TargetQ >= cpi->pb.QThreshTable[i] ) {
ThreshTableIndex = i;
break;
}
}
/* Adjust according to Q shift and type of frame */
if ( GetFrameType(&cpi->pb) == BASE_FRAME ) {
/* Get primary prediction */
BytesPerBlock = KfBpbTable[ThreshTableIndex];
} else {
/* Get primary prediction */
BytesPerBlock = BpbTable[ThreshTableIndex];
BytesPerBlock = BytesPerBlock * cpi->BpbCorrectionFactor;
}
return BytesPerBlock;
}
static void UpRegulateMB( CP_INSTANCE *cpi, ogg_uint32_t RegulationQ,
ogg_uint32_t SB, ogg_uint32_t MB, int NoCheck ) {
ogg_int32_t FragIndex;
ogg_uint32_t B;
/* Variables used in calculating corresponding row,col and index in
UV planes */
ogg_uint32_t UVRow;
ogg_uint32_t UVColumn;
ogg_uint32_t UVFragOffset;
/* There may be MB's lying out of frame which must be ignored. For
these MB's Top left block will have a negative Fragment Index. */
if ( QuadMapToMBTopLeft(cpi->pb.BlockMap, SB, MB ) >= 0 ) {
/* Up regulate the component blocks Y then UV. */
for ( B=0; B<4; B++ ){
FragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B );
if ( ( !cpi->pb.display_fragments[FragIndex] ) &&
( (NoCheck) || (cpi->FragmentLastQ[FragIndex] > RegulationQ) ) ){
cpi->pb.display_fragments[FragIndex] = 1;
cpi->extra_fragments[FragIndex] = 1;
cpi->FragmentLastQ[FragIndex] = RegulationQ;
cpi->MotionScore++;
}
}
/* Check the two UV blocks */
FragIndex = QuadMapToMBTopLeft(cpi->pb.BlockMap, SB, MB );
UVRow = (FragIndex / (cpi->pb.HFragments * 2));
UVColumn = (FragIndex % cpi->pb.HFragments) / 2;
UVFragOffset = (UVRow * (cpi->pb.HFragments / 2)) + UVColumn;
FragIndex = cpi->pb.YPlaneFragments + UVFragOffset;
if ( ( !cpi->pb.display_fragments[FragIndex] ) &&
( (NoCheck) || (cpi->FragmentLastQ[FragIndex] > RegulationQ) ) ) {
cpi->pb.display_fragments[FragIndex] = 1;
cpi->extra_fragments[FragIndex] = 1;
cpi->FragmentLastQ[FragIndex] = RegulationQ;
cpi->MotionScore++;
}
FragIndex += cpi->pb.UVPlaneFragments;
if ( ( !cpi->pb.display_fragments[FragIndex] ) &&
( (NoCheck) || (cpi->FragmentLastQ[FragIndex] > RegulationQ) ) ) {
cpi->pb.display_fragments[FragIndex] = 1;
cpi->extra_fragments[FragIndex] = 1;
cpi->FragmentLastQ[FragIndex] = RegulationQ;
cpi->MotionScore++;
}
}
}
static void UpRegulateBlocks (CP_INSTANCE *cpi, ogg_uint32_t RegulationQ,
ogg_int32_t RecoveryBlocks,
ogg_uint32_t * LastSB, ogg_uint32_t * LastMB ) {
ogg_uint32_t LoopTimesRound = 0;
ogg_uint32_t MaxSB = cpi->pb.YSBRows *
cpi->pb.YSBCols; /* Tot super blocks in image */
ogg_uint32_t SB, MB; /* Super-Block and macro block indices. */
/* First scan for blocks for which a residue update is outstanding. */
while ( (cpi->MotionScore < RecoveryBlocks) &&
(LoopTimesRound < MAX_UP_REG_LOOPS) ) {
LoopTimesRound++;
for ( SB = (*LastSB); SB < MaxSB; SB++ ) {
/* Check its four Macro-Blocks */
for ( MB=(*LastMB); MB<4; MB++ ) {
/* Mark relevant blocks for update */
UpRegulateMB( cpi, RegulationQ, SB, MB, 0 );
/* Keep track of the last refresh MB. */
(*LastMB) += 1;
if ( (*LastMB) == 4 )
(*LastMB) = 0;
/* Termination clause */
if (cpi->MotionScore >= RecoveryBlocks) {
/* Make sure we don't stall at SB level */
if ( *LastMB == 0 )
SB++;
break;
}
}
/* Termination clause */
if (cpi->MotionScore >= RecoveryBlocks)
break;
}
/* Update super block start index */
if ( SB >= MaxSB){
(*LastSB) = 0;
}else{
(*LastSB) = SB;
}
}
}
void UpRegulateDataStream (CP_INSTANCE *cpi, ogg_uint32_t RegulationQ,
ogg_int32_t RecoveryBlocks ) {
ogg_uint32_t LastPassMBPos = 0;
ogg_uint32_t StdLastMBPos = 0;
ogg_uint32_t MaxSB = cpi->pb.YSBRows *
cpi->pb.YSBCols; /* Tot super blocks in image */
ogg_uint32_t SB=0; /* Super-Block index */
ogg_uint32_t MB; /* Macro-Block index */
/* Decduct the number of blocks in an MB / 2 from the recover block count.
This will compensate for the fact that once we start checking an MB
we test every block in that macro block */
if ( RecoveryBlocks > 3 )
RecoveryBlocks -= 3;
/* Up regulate blocks last coded at higher Q */
UpRegulateBlocks( cpi, RegulationQ, RecoveryBlocks,
&cpi->LastEndSB, &StdLastMBPos );
/* If we have still not used up the minimum number of blocks and are
at the minimum Q then run through a final pass of the data to
insure that each block gets a final refresh. */
if ( (RegulationQ == VERY_BEST_Q) &&
(cpi->MotionScore < RecoveryBlocks) ) {
if ( cpi->FinalPassLastPos < MaxSB ) {
for ( SB = cpi->FinalPassLastPos; SB < MaxSB; SB++ ) {
/* Check its four Macro-Blocks */
for ( MB=LastPassMBPos; MB<4; MB++ ) {
/* Mark relevant blocks for update */
UpRegulateMB( cpi, RegulationQ, SB, MB, 1 );
/* Keep track of the last refresh MB. */
LastPassMBPos += 1;
if ( LastPassMBPos == 4 ) {
LastPassMBPos = 0;
/* Increment SB index */
cpi->FinalPassLastPos += 1;
}
/* Termination clause */
if (cpi->MotionScore >= RecoveryBlocks)
break;
}
/* Termination clause */
if (cpi->MotionScore >= RecoveryBlocks)
break;
}
}
}
}
void RegulateQ( CP_INSTANCE *cpi, ogg_int32_t UpdateScore ) {
double TargetUnitScoreBytes = (double)cpi->ThisFrameTargetBytes /
(double)UpdateScore;
double PredUnitScoreBytes;
double LastBitError = 10000.0; /* Silly high number */
ogg_uint32_t QIndex = Q_TABLE_SIZE - 1;
ogg_uint32_t i;
/* Search for the best Q for the target bitrate. */
for ( i = 0; i < Q_TABLE_SIZE; i++ ) {
PredUnitScoreBytes = GetEstimatedBpb( cpi, cpi->pb.QThreshTable[i] );
if ( PredUnitScoreBytes > TargetUnitScoreBytes ) {
if ( (PredUnitScoreBytes - TargetUnitScoreBytes) <= LastBitError ) {
QIndex = i;
} else {
QIndex = i - 1;
}
break;
} else {
LastBitError = TargetUnitScoreBytes - PredUnitScoreBytes;
}
}
/* QIndex should now indicate the optimal Q. */
cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[QIndex];
/* Apply range restrictions for key frames. */
if ( GetFrameType(&cpi->pb) == BASE_FRAME ) {
if ( cpi->pb.ThisFrameQualityValue > cpi->pb.QThreshTable[20] )
cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[20];
else if ( cpi->pb.ThisFrameQualityValue < cpi->pb.QThreshTable[50] )
cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[50];
}
/* Limit the Q value to the maximum available value */
if (cpi->pb.ThisFrameQualityValue >
cpi->pb.QThreshTable[cpi->Configuration.ActiveMaxQ]) {
cpi->pb.ThisFrameQualityValue =
(ogg_uint32_t)cpi->pb.QThreshTable[cpi->Configuration.ActiveMaxQ];
}
if(cpi->FixedQ) {
if ( GetFrameType(&cpi->pb) == BASE_FRAME ) {
cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[43];
cpi->pb.ThisFrameQualityValue = cpi->FixedQ;
} else {
cpi->pb.ThisFrameQualityValue = cpi->FixedQ;
}
}
/* If th quantiser value has changed then re-initialise it */
if ( cpi->pb.ThisFrameQualityValue != cpi->pb.LastFrameQualityValue ) {
/* Initialise quality tables. */
UpdateQC( cpi, cpi->pb.ThisFrameQualityValue );
cpi->pb.LastFrameQualityValue = cpi->pb.ThisFrameQualityValue;
}
}
void CopyBackExtraFrags(CP_INSTANCE *cpi){
ogg_uint32_t i,j;
unsigned char * SrcPtr;
unsigned char * DestPtr;
ogg_uint32_t PlaneLineStep;
ogg_uint32_t PixelIndex;
/* Copy back for Y plane. */
PlaneLineStep = cpi->pb.info.width;
for ( i = 0; i < cpi->pb.YPlaneFragments; i++ ) {
/* We are only interested in updated fragments. */
if ( cpi->extra_fragments[i] ) {
/* Get the start index for the fragment. */
PixelIndex = cpi->pb.pixel_index_table[i];
SrcPtr = &cpi->yuv1ptr[PixelIndex];
DestPtr = &cpi->ConvDestBuffer[PixelIndex];
for ( j = 0; j < VFRAGPIXELS; j++ ) {
memcpy( DestPtr, SrcPtr, HFRAGPIXELS);
SrcPtr += PlaneLineStep;
DestPtr += PlaneLineStep;
}
}
}
/* Now the U and V planes */
PlaneLineStep = cpi->pb.info.width / 2;
for ( i = cpi->pb.YPlaneFragments;
i < (cpi->pb.YPlaneFragments + (2 * cpi->pb.UVPlaneFragments)) ;
i++ ) {
/* We are only interested in updated fragments. */
if ( cpi->extra_fragments[i] ) {
/* Get the start index for the fragment. */
PixelIndex = cpi->pb.pixel_index_table[i];
SrcPtr = &cpi->yuv1ptr[PixelIndex];
DestPtr = &cpi->ConvDestBuffer[PixelIndex];
for ( j = 0; j < VFRAGPIXELS; j++ ) {
memcpy( DestPtr, SrcPtr, HFRAGPIXELS);
SrcPtr += PlaneLineStep;
DestPtr += PlaneLineStep;
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,128 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: ocintrin.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
/*Some common macros for potential platform-specific optimization.*/
#include <math.h>
#if !defined(_ocintrin_H)
# define _ocintrin_H (1)
/*Some specific platforms may have optimized intrinsic or inline assembly
versions of these functions which can substantially improve performance.
We define macros for them to allow easy incorporation of these non-ANSI
features.*/
/*Note that we do not provide a macro for abs(), because it is provided as a
library function, which we assume is translated into an intrinsic to avoid
the function call overhead and then implemented in the smartest way for the
target platform.
With modern gcc (4.x), this is true: it uses cmov instructions if the
architecture supports it and branchless bit-twiddling if it does not (the
speed difference between the two approaches is not measurable).
Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150)
by Sun Microsystems, despite prior art dating back to at least 1996:
http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT
On gcc 3.x, however, our assumption is not true, as abs() is translated to a
conditional jump, which is horrible on deeply piplined architectures (e.g.,
all consumer architectures for the past decade or more).
Also be warned that -C*abs(x) where C is a constant is mis-optimized as
abs(C*x) on every gcc release before 4.2.3.
See bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34130 */
/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
given an appropriate architecture, but the branchless bit-twiddling versions
are just as fast, and do not require any special target architecture.
Earlier gcc versions (3.x) compiled both code to the same assembly
instructions, because of the way they represented ((_b)>(_a)) internally.*/
#define OC_MAXI(_a,_b) ((_a)-((_a)-(_b)&-((_b)>(_a))))
#define OC_MINI(_a,_b) ((_a)+((_b)-(_a)&-((_b)<(_a))))
/*Clamps an integer into the given range.
If _a>_c, then the lower bound _a is respected over the upper bound _c (this
behavior is required to meet our documented API behavior).
_a: The lower bound.
_b: The value to clamp.
_c: The upper boud.*/
#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
#define OC_CLAMP255(_x) ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255))))
/*This has a chance of compiling branchless, and is just as fast as the
bit-twiddling method, which is slightly less portable, since it relies on a
sign-extended rightshift, which is not guaranteed by ANSI (but present on
every relevant platform).*/
#define OC_SIGNI(_a) (((_a)>0)-((_a)<0))
/*Slightly more portable than relying on a sign-extended right-shift (which is
not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both)
compile it into the right-shift anyway.*/
#define OC_SIGNMASK(_a) (-((_a)<0))
/*Divides an integer by a power of two, truncating towards 0.
_dividend: The integer to divide.
_shift: The non-negative power of two to divide by.
_rmask: (1<<_shift)-1*/
#define OC_DIV_POW2(_dividend,_shift,_rmask)\
((_dividend)+(OC_SIGNMASK(_dividend)&(_rmask))>>(_shift))
/*Divides _x by 65536, truncating towards 0.*/
#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF)
/*Divides _x by 2, truncating towards 0.*/
#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1)
/*Divides _x by 8, truncating towards 0.*/
#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7)
/*Divides _x by 16, truncating towards 0.*/
#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF)
/*Right shifts _dividend by _shift, adding _rval, and subtracting one for
negative dividends first.
When _rval is (1<<_shift-1), this is equivalent to division with rounding
ties away from zero.*/
#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\
((_dividend)+OC_SIGNMASK(_dividend)+(_rval)>>(_shift))
/*Divides a _x by 2, rounding towards even numbers.*/
#define OC_DIV2_RE(_x) ((_x)+((_x)>>1&1)>>1)
/*Divides a _x by (1<<(_shift)), rounding towards even numbers.*/
#define OC_DIV_POW2_RE(_x,_shift) \
((_x)+((_x)>>(_shift)&1)+((1<<(_shift))-1>>1)>>(_shift))
/*Swaps two integers _a and _b if _a>_b.*/
#define OC_SORT2I(_a,_b) \
do{ \
int t__; \
t__=((_a)^(_b))&-((_b)<(_a)); \
(_a)^=t__; \
(_b)^=t__; \
} \
while(0)
/*Accesses one of four (signed) bytes given an index.
This can be used to avoid small lookup tables.*/
#define OC_BYTE_TABLE32(_a,_b,_c,_d,_i) \
((signed char) \
(((_a)&0xFF|((_b)&0xFF)<<8|((_c)&0xFF)<<16|((_d)&0xFF)<<24)>>(_i)*8))
/*Accesses one of eight (unsigned) nibbles given an index.
This can be used to avoid small lookup tables.*/
#define OC_UNIBBLE_TABLE32(_a,_b,_c,_d,_e,_f,_g,_h,_i) \
((((_a)&0xF|((_b)&0xF)<<4|((_c)&0xF)<<8|((_d)&0xF)<<12| \
((_e)&0xF)<<16|((_f)&0xF)<<20|((_g)&0xF)<<24|((_h)&0xF)<<28)>>(_i)*4)&0xF)
/*All of these macros should expect floats as arguments.*/
#define OC_MAXF(_a,_b) ((_a)<(_b)?(_b):(_a))
#define OC_MINF(_a,_b) ((_a)>(_b)?(_b):(_a))
#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c)))
#define OC_FABSF(_f) ((float)fabs(_f))
#define OC_SQRTF(_f) ((float)sqrt(_f))
#define OC_POWF(_b,_e) ((float)pow(_b,_e))
#define OC_LOGF(_f) ((float)log(_f))
#define OC_IFLOORF(_f) ((int)floor(_f))
#define OC_ICEILF(_f) ((int)ceil(_f))
#endif

View File

@ -1,117 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: pb.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include <stdlib.h>
#include <string.h>
#include "encoder_internal.h"
void ClearTmpBuffers(PB_INSTANCE * pbi){
if(pbi->ReconDataBuffer)
_ogg_free(pbi->ReconDataBuffer);
if(pbi->DequantBuffer)
_ogg_free(pbi->DequantBuffer);
if(pbi->TmpDataBuffer)
_ogg_free(pbi->TmpDataBuffer);
if(pbi->TmpReconBuffer)
_ogg_free(pbi->TmpReconBuffer);
if(pbi->dequant_Y_coeffs)
_ogg_free(pbi->dequant_Y_coeffs);
if(pbi->dequant_UV_coeffs)
_ogg_free(pbi->dequant_UV_coeffs);
if(pbi->dequant_Inter_coeffs)
_ogg_free(pbi->dequant_Inter_coeffs);
if(pbi->dequant_InterUV_coeffs)
_ogg_free(pbi->dequant_InterUV_coeffs);
pbi->ReconDataBuffer=0;
pbi->DequantBuffer = 0;
pbi->TmpDataBuffer = 0;
pbi->TmpReconBuffer = 0;
pbi->dequant_Y_coeffs = 0;
pbi->dequant_UV_coeffs = 0;
pbi->dequant_InterUV_coeffs = 0;
pbi->dequant_Inter_coeffs = 0;
}
void InitTmpBuffers(PB_INSTANCE * pbi){
/* clear any existing info */
ClearTmpBuffers(pbi);
/* Adjust the position of all of our temporary */
pbi->ReconDataBuffer =
_ogg_malloc(64*sizeof(*pbi->ReconDataBuffer));
pbi->DequantBuffer =
_ogg_malloc(64 * sizeof(*pbi->DequantBuffer));
pbi->TmpDataBuffer =
_ogg_malloc(64 * sizeof(*pbi->TmpDataBuffer));
pbi->TmpReconBuffer =
_ogg_malloc(64 * sizeof(*pbi->TmpReconBuffer));
pbi->dequant_Y_coeffs =
_ogg_malloc(64 * sizeof(*pbi->dequant_Y_coeffs));
pbi->dequant_UV_coeffs =
_ogg_malloc(64 * sizeof(*pbi->dequant_UV_coeffs));
pbi->dequant_Inter_coeffs =
_ogg_malloc(64 * sizeof(*pbi->dequant_Inter_coeffs));
pbi->dequant_InterUV_coeffs =
_ogg_malloc(64 * sizeof(*pbi->dequant_InterUV_coeffs));
}
void ClearPBInstance(PB_INSTANCE *pbi){
if(pbi){
ClearTmpBuffers(pbi);
if (pbi->opb) {
_ogg_free(pbi->opb);
}
}
}
void InitPBInstance(PB_INSTANCE *pbi){
/* initialize whole structure to 0 */
memset(pbi, 0, sizeof(*pbi));
InitTmpBuffers(pbi);
/* allocate memory for the oggpack_buffer */
#ifndef LIBOGG2
pbi->opb = _ogg_malloc(sizeof(oggpack_buffer));
#else
pbi->opb = _ogg_malloc(oggpack_buffersize());
#endif
/* variables needing initialization (not being set to 0) */
pbi->ModifierPointer[0] = &pbi->Modifier[0][255];
pbi->ModifierPointer[1] = &pbi->Modifier[1][255];
pbi->ModifierPointer[2] = &pbi->Modifier[2][255];
pbi->ModifierPointer[3] = &pbi->Modifier[3][255];
pbi->DecoderErrorCode = 0;
pbi->KeyFrameType = DCT_KEY_FRAME;
pbi->FramesHaveBeenSkipped = 0;
}

View File

@ -1,951 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: pp.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include <stdlib.h>
#include <string.h>
#include "encoder_internal.h"
#include "pp.h"
#define MAX(a, b) ((a>b)?a:b)
#define MIN(a, b) ((a<b)?a:b)
#define PP_QUALITY_THRESH 49
static ogg_int32_t SharpenModifier[ Q_TABLE_SIZE ] =
{ -12, -11, -10, -10, -9, -9, -9, -9,
-6, -6, -6, -6, -6, -6, -6, -6,
-4, -4, -4, -4, -4, -4, -4, -4,
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0
};
static ogg_uint32_t DcQuantScaleV1[ Q_TABLE_SIZE ] = {
22, 20, 19, 18, 17, 17, 16, 16,
15, 15, 14, 14, 13, 13, 12, 12,
11, 11, 10, 10, 9, 9, 9, 8,
8, 8, 7, 7, 7, 6, 6, 6,
6, 5, 5, 5, 5, 4, 4, 4,
4, 4, 3, 3, 3, 3, 3, 3,
3, 2, 2, 2, 2, 2, 2, 2,
2, 1, 1, 1, 1, 1, 1, 1
};
static ogg_uint32_t *DeringModifierV1=DcQuantScaleV1;
static void PClearFrameInfo(PP_INSTANCE * ppi){
int i;
if(ppi->ScanPixelIndexTable) _ogg_free(ppi->ScanPixelIndexTable);
ppi->ScanPixelIndexTable=0;
if(ppi->ScanDisplayFragments) _ogg_free(ppi->ScanDisplayFragments);
ppi->ScanDisplayFragments=0;
for(i = 0 ; i < MAX_PREV_FRAMES ; i ++)
if(ppi->PrevFragments[i]){
_ogg_free(ppi->PrevFragments[i]);
ppi->PrevFragments[i]=0;
}
if(ppi->FragScores) _ogg_free(ppi->FragScores);
ppi->FragScores=0;
if(ppi->SameGreyDirPixels) _ogg_free(ppi->SameGreyDirPixels);
ppi->SameGreyDirPixels=0;
if(ppi->FragDiffPixels) _ogg_free(ppi->FragDiffPixels);
ppi->FragDiffPixels=0;
if(ppi->BarBlockMap) _ogg_free(ppi->BarBlockMap);
ppi->BarBlockMap=0;
if(ppi->TmpCodedMap) _ogg_free(ppi->TmpCodedMap);
ppi->TmpCodedMap=0;
if(ppi->RowChangedPixels) _ogg_free(ppi->RowChangedPixels);
ppi->RowChangedPixels=0;
if(ppi->PixelScores) _ogg_free(ppi->PixelScores);
ppi->PixelScores=0;
if(ppi->PixelChangedMap) _ogg_free(ppi->PixelChangedMap);
ppi->PixelChangedMap=0;
if(ppi->ChLocals) _ogg_free(ppi->ChLocals);
ppi->ChLocals=0;
if(ppi->yuv_differences) _ogg_free(ppi->yuv_differences);
ppi->yuv_differences=0;
}
void PInitFrameInfo(PP_INSTANCE * ppi){
int i;
PClearFrameInfo(ppi);
ppi->ScanPixelIndexTable =
_ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->ScanPixelIndexTable));
ppi->ScanDisplayFragments =
_ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->ScanDisplayFragments));
for(i = 0 ; i < MAX_PREV_FRAMES ; i ++)
ppi->PrevFragments[i] =
_ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->PrevFragments));
ppi->FragScores =
_ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->FragScores));
ppi->SameGreyDirPixels =
_ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->SameGreyDirPixels));
ppi->FragDiffPixels =
_ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->FragScores));
ppi->BarBlockMap=
_ogg_malloc(3 * ppi->ScanHFragments*sizeof(*ppi->BarBlockMap));
ppi->TmpCodedMap =
_ogg_malloc(ppi->ScanHFragments*sizeof(*ppi->TmpCodedMap));
ppi->RowChangedPixels =
_ogg_malloc(3 * ppi->ScanConfig.VideoFrameHeight*
sizeof(*ppi->RowChangedPixels));
ppi->PixelScores =
_ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
sizeof(*ppi->PixelScores) * PSCORE_CB_ROWS);
ppi->PixelChangedMap =
_ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
sizeof(*ppi->PixelChangedMap) * PMAP_CB_ROWS);
ppi->ChLocals =
_ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
sizeof(*ppi->ChLocals) * CHLOCALS_CB_ROWS);
ppi->yuv_differences =
_ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
sizeof(*ppi->yuv_differences) * YDIFF_CB_ROWS);
}
void ClearPPInstance(PP_INSTANCE *ppi){
PClearFrameInfo(ppi);
}
void InitPPInstance(PP_INSTANCE *ppi){
memset(ppi,0,sizeof(*ppi));
/* Initializations */
ppi->PrevFrameLimit = 3; /* Must not exceed MAX_PREV_FRAMES (Note
that this number includes the current
frame so "1 = no effect") */
/* Scan control variables. */
ppi->HFragPixels = 8;
ppi->VFragPixels = 8;
ppi->SRFGreyThresh = 4;
ppi->SRFColThresh = 5;
ppi->NoiseSupLevel = 3;
ppi->SgcLevelThresh = 3;
ppi->SuvcLevelThresh = 4;
/* Variables controlling S.A.D. breakouts. */
ppi->GrpLowSadThresh = 10;
ppi->GrpHighSadThresh = 64;
ppi->PrimaryBlockThreshold = 5;
ppi->SgcThresh = 16; /* (Default values for 8x8 blocks). */
ppi->UVBlockThreshCorrection = 1.25;
ppi->UVSgcCorrection = 1.5;
ppi->MaxLineSearchLen = MAX_SEARCH_LINE_LEN;
}
static void DeringBlockStrong(unsigned char *SrcPtr,
unsigned char *DstPtr,
ogg_int32_t Pitch,
ogg_uint32_t FragQIndex,
ogg_uint32_t *QuantScale){
ogg_int16_t UDMod[72];
ogg_int16_t LRMod[72];
unsigned int j,k,l;
const unsigned char * Src;
unsigned int QValue = QuantScale[FragQIndex];
unsigned char p;
unsigned char pl;
unsigned char pr;
unsigned char pu;
unsigned char pd;
int al;
int ar;
int au;
int ad;
int atot;
int B;
int newVal;
const unsigned char *curRow = SrcPtr - 1; /* avoid negative array indexes */
unsigned char *dstRow = DstPtr;
const unsigned char *lastRow = SrcPtr-Pitch;
const unsigned char *nextRow = SrcPtr+Pitch;
unsigned int rowOffset = 0;
unsigned int round = (1<<6);
int High;
int Low;
int TmpMod;
int Sharpen = SharpenModifier[FragQIndex];
High = 3 * QValue;
if(High>32)High=32;
Low = 0;
/* Initialize the Mod Data */
Src = SrcPtr-Pitch;
for(k=0;k<9;k++){
for(j=0;j<8;j++){
TmpMod = 32 + QValue - (abs(Src[j+Pitch]-Src[j]));
if(TmpMod< -64)
TmpMod = Sharpen;
else if(TmpMod<Low)
TmpMod = Low;
else if(TmpMod>High)
TmpMod = High;
UDMod[k*8+j] = (ogg_int16_t)TmpMod;
}
Src +=Pitch;
}
Src = SrcPtr-1;
for(k=0;k<8;k++){
for(j=0;j<9;j++){
TmpMod = 32 + QValue - (abs(Src[j+1]-Src[j]));
if(TmpMod< -64 )
TmpMod = Sharpen;
else if(TmpMod<0)
TmpMod = Low;
else if(TmpMod>High)
TmpMod = High;
LRMod[k*9+j] = (ogg_int16_t)TmpMod;
}
Src+=Pitch;
}
for(k=0;k<8;k++){
/* In the case that this function called with same buffer for
source and destination, To keep the c and the mmx version to have
consistant results, intermediate buffer is used to store the
eight pixel value before writing them to destination
(i.e. Overwriting souce for the speical case) */
for(l=0;l<8;l++){
atot = 128;
B = round;
p = curRow[ rowOffset +l +1];
pl = curRow[ rowOffset +l];
al = LRMod[k*9+l];
atot -= al;
B += al * pl;
pu = lastRow[ rowOffset +l];
au = UDMod[k*8+l];
atot -= au;
B += au * pu;
pd = nextRow[ rowOffset +l];
ad = UDMod[(k+1)*8+l];
atot -= ad;
B += ad * pd;
pr = curRow[ rowOffset +l+2];
ar = LRMod[k*9+l+1];
atot -= ar;
B += ar * pr;
newVal = ( atot * p + B) >> 7;
dstRow[ rowOffset +l]= clamp255( newVal );
}
rowOffset += Pitch;
}
}
static void DeringBlockWeak(unsigned char *SrcPtr,
unsigned char *DstPtr,
ogg_int32_t Pitch,
ogg_uint32_t FragQIndex,
ogg_uint32_t *QuantScale){
ogg_int16_t UDMod[72];
ogg_int16_t LRMod[72];
unsigned int j,k;
const unsigned char * Src;
unsigned int QValue = QuantScale[FragQIndex];
unsigned char p;
unsigned char pl;
unsigned char pr;
unsigned char pu;
unsigned char pd;
int al;
int ar;
int au;
int ad;
int atot;
int B;
int newVal;
const unsigned char *curRow = SrcPtr-1;
unsigned char *dstRow = DstPtr;
const unsigned char *lastRow = SrcPtr-Pitch;
const unsigned char *nextRow = SrcPtr+Pitch;
unsigned int rowOffset = 0;
unsigned int round = (1<<6);
int High;
int Low;
int TmpMod;
int Sharpen = SharpenModifier[FragQIndex];
High = 3 * QValue;
if(High>24)
High=24;
Low = 0 ;
/* Initialize the Mod Data */
Src=SrcPtr-Pitch;
for(k=0;k<9;k++) {
for(j=0;j<8;j++) {
TmpMod = 32 + QValue - 2*(abs(Src[j+Pitch]-Src[j]));
if(TmpMod< -64)
TmpMod = Sharpen;
else if(TmpMod<Low)
TmpMod = Low;
else if(TmpMod>High)
TmpMod = High;
UDMod[k*8+j] = (ogg_int16_t)TmpMod;
}
Src +=Pitch;
}
Src = SrcPtr-1;
for(k=0;k<8;k++){
for(j=0;j<9;j++){
TmpMod = 32 + QValue - 2*(abs(Src[j+1]-Src[j]));
if(TmpMod< -64 )
TmpMod = Sharpen;
else if(TmpMod<Low)
TmpMod = Low;
else if(TmpMod>High)
TmpMod = High;
LRMod[k*9+j] = (ogg_int16_t)TmpMod;
}
Src+=Pitch;
}
for(k=0;k<8;k++) {
for(j=0;j<8;j++){
atot = 128;
B = round;
p = curRow[ rowOffset +j+1];
pl = curRow[ rowOffset +j];
al = LRMod[k*9+j];
atot -= al;
B += al * pl;
pu = lastRow[ rowOffset +j];
au = UDMod[k*8+j];
atot -= au;
B += au * pu;
pd = nextRow[ rowOffset +j];
ad = UDMod[(k+1)*8+j];
atot -= ad;
B += ad * pd;
pr = curRow[ rowOffset +j+2];
ar = LRMod[k*9+j+1];
atot -= ar;
B += ar * pr;
newVal = ( atot * p + B) >> 7;
dstRow[ rowOffset +j] = clamp255( newVal );
}
rowOffset += Pitch;
}
}
static void DeringFrame(PB_INSTANCE *pbi,
unsigned char *Src, unsigned char *Dst){
ogg_uint32_t col,row;
unsigned char *SrcPtr;
unsigned char *DestPtr;
ogg_uint32_t BlocksAcross,BlocksDown;
ogg_uint32_t *QuantScale;
ogg_uint32_t Block;
ogg_uint32_t LineLength;
ogg_int32_t Thresh1,Thresh2,Thresh3,Thresh4;
Thresh1 = 384;
Thresh2 = 4 * Thresh1;
Thresh3 = 5 * Thresh2/4;
Thresh4 = 5 * Thresh2/2;
QuantScale = DeringModifierV1;
BlocksAcross = pbi->HFragments;
BlocksDown = pbi->VFragments;
SrcPtr = Src + pbi->ReconYDataOffset;
DestPtr = Dst + pbi->ReconYDataOffset;
LineLength = pbi->YStride;
Block = 0;
for ( row = 0 ; row < BlocksDown; row ++){
for (col = 0; col < BlocksAcross; col ++){
ogg_uint32_t Quality = pbi->FragQIndex[Block];
ogg_int32_t Variance = pbi->FragmentVariances[Block];
if( pbi->PostProcessingLevel >5 && Variance > Thresh3 ){
DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
if( (col > 0 &&
pbi->FragmentVariances[Block-1] > Thresh4 ) ||
(col + 1 < BlocksAcross &&
pbi->FragmentVariances[Block+1] > Thresh4 ) ||
(row + 1 < BlocksDown &&
pbi->FragmentVariances[Block+BlocksAcross] > Thresh4) ||
(row > 0 &&
pbi->FragmentVariances[Block-BlocksAcross] > Thresh4) ){
DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
}
} else if(Variance > Thresh2 ) {
DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
} else if(Variance > Thresh1 ) {
DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
} else {
CopyBlock(SrcPtr + 8 * col, DestPtr + 8 * col, LineLength);
}
++Block;
}
SrcPtr += 8 * LineLength;
DestPtr += 8 * LineLength;
}
/* Then U */
BlocksAcross /= 2;
BlocksDown /= 2;
LineLength /= 2;
SrcPtr = Src + pbi->ReconUDataOffset;
DestPtr = Dst + pbi->ReconUDataOffset;
for ( row = 0 ; row < BlocksDown; row ++) {
for (col = 0; col < BlocksAcross; col ++) {
ogg_uint32_t Quality = pbi->FragQIndex[Block];
ogg_int32_t Variance = pbi->FragmentVariances[Block];
if( pbi->PostProcessingLevel >5 && Variance > Thresh4 ) {
DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
}else if(Variance > Thresh2 ){
DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
}else if(Variance > Thresh1 ){
DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
}else{
CopyBlock(SrcPtr + 8 * col, DestPtr + 8 * col, LineLength);
}
++Block;
}
SrcPtr += 8 * LineLength;
DestPtr += 8 * LineLength;
}
/* Then V */
SrcPtr = Src + pbi->ReconVDataOffset;
DestPtr = Dst + pbi->ReconVDataOffset;
for ( row = 0 ; row < BlocksDown; row ++){
for (col = 0; col < BlocksAcross; col ++){
ogg_uint32_t Quality = pbi->FragQIndex[Block];
ogg_int32_t Variance = pbi->FragmentVariances[Block];
if( pbi->PostProcessingLevel >5 && Variance > Thresh4 ) {
DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
}else if(Variance > Thresh2 ){
DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
}else if(Variance > Thresh1 ){
DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col,
LineLength,Quality,QuantScale);
}else{
CopyBlock(SrcPtr + 8 * col, DestPtr + 8 * col, LineLength);
}
++Block;
}
SrcPtr += 8 * LineLength;
DestPtr += 8 * LineLength;
}
}
void UpdateFragQIndex(PB_INSTANCE *pbi){
ogg_uint32_t ThisFrameQIndex;
ogg_uint32_t i;
/* Check this frame quality index */
ThisFrameQIndex = pbi->FrameQIndex;
/* It is not a key frame, so only reset those are coded */
for( i = 0; i < pbi->UnitFragments; i++ )
if( pbi->display_fragments[i])
pbi->FragQIndex[i] = ThisFrameQIndex;
}
static void DeblockLoopFilteredBand(PB_INSTANCE *pbi,
unsigned char *SrcPtr,
unsigned char *DesPtr,
ogg_uint32_t PlaneLineStep,
ogg_uint32_t FragsAcross,
ogg_uint32_t StartFrag,
ogg_uint32_t *QuantScale){
ogg_uint32_t j,k;
ogg_uint32_t CurrentFrag=StartFrag;
ogg_int32_t QStep;
ogg_int32_t FLimit;
unsigned char *Src, *Des;
ogg_int32_t x[10];
ogg_int32_t Sum1, Sum2;
while(CurrentFrag < StartFrag + FragsAcross){
Src=SrcPtr+8*(CurrentFrag-StartFrag)-PlaneLineStep*5;
Des=DesPtr+8*(CurrentFrag-StartFrag)-PlaneLineStep*4;
QStep = QuantScale[pbi->FragQIndex[CurrentFrag+FragsAcross]];
FLimit = ( QStep * 3 ) >> 2;
for( j=0; j<8 ; j++){
x[0] = Src[0];
x[1] = Src[PlaneLineStep];
x[2] = Src[PlaneLineStep*2];
x[3] = Src[PlaneLineStep*3];
x[4] = Src[PlaneLineStep*4];
x[5] = Src[PlaneLineStep*5];
x[6] = Src[PlaneLineStep*6];
x[7] = Src[PlaneLineStep*7];
x[8] = Src[PlaneLineStep*8];
x[9] = Src[PlaneLineStep*9];
Sum1=Sum2=0;
for(k=1;k<=4;k++){
Sum1 += abs(x[k]-x[k-1]);
Sum2 += abs(x[k+4]-x[k+5]);
}
pbi->FragmentVariances[CurrentFrag] +=((Sum1>255)?255:Sum1);
pbi->FragmentVariances[CurrentFrag + FragsAcross] += ((Sum2>255)?255:Sum2);
if( Sum1 < FLimit &&
Sum2 < FLimit &&
(x[5] - x[4]) < QStep &&
(x[4] - x[5]) < QStep ){
/* low pass filtering (LPF7: 1 1 1 2 1 1 1) */
Des[0 ] = (x[0] + x[0] +x[0] + x[1] * 2 +
x[2] + x[3] +x[4] + 4) >> 3;
Des[PlaneLineStep ] = (x[0] + x[0] +x[1] + x[2] * 2 +
x[3] + x[4] +x[5] + 4) >> 3;
Des[PlaneLineStep*2] = (x[0] + x[1] +x[2] + x[3] * 2 +
x[4] + x[5] +x[6] + 4) >> 3;
Des[PlaneLineStep*3] = (x[1] + x[2] +x[3] + x[4] * 2 +
x[5] + x[6] +x[7] + 4) >> 3;
Des[PlaneLineStep*4] = (x[2] + x[3] +x[4] + x[5] * 2 +
x[6] + x[7] +x[8] + 4) >> 3;
Des[PlaneLineStep*5] = (x[3] + x[4] +x[5] + x[6] * 2 +
x[7] + x[8] +x[9] + 4) >> 3;
Des[PlaneLineStep*6] = (x[4] + x[5] +x[6] + x[7] * 2 +
x[8] + x[9] +x[9] + 4) >> 3;
Des[PlaneLineStep*7] = (x[5] + x[6] +x[7] + x[8] * 2 +
x[9] + x[9] +x[9] + 4) >> 3;
}else {
/* copy the pixels to destination */
Des[0 ]= (unsigned char)x[1];
Des[PlaneLineStep ]= (unsigned char)x[2];
Des[PlaneLineStep*2]= (unsigned char)x[3];
Des[PlaneLineStep*3]= (unsigned char)x[4];
Des[PlaneLineStep*4]= (unsigned char)x[5];
Des[PlaneLineStep*5]= (unsigned char)x[6];
Des[PlaneLineStep*6]= (unsigned char)x[7];
Des[PlaneLineStep*7]= (unsigned char)x[8];
}
Src ++;
Des ++;
}
/* done with filtering the horizontal edge, now let's do the
vertical one */
/* skip the first one */
if(CurrentFrag==StartFrag)
CurrentFrag++;
else{
Des=DesPtr-8*PlaneLineStep+8*(CurrentFrag-StartFrag);
Src=Des-5;
Des-=4;
QStep = QuantScale[pbi->FragQIndex[CurrentFrag]];
FLimit = ( QStep * 3 ) >> 2;
for( j=0; j<8 ; j++){
x[0] = Src[0];
x[1] = Src[1];
x[2] = Src[2];
x[3] = Src[3];
x[4] = Src[4];
x[5] = Src[5];
x[6] = Src[6];
x[7] = Src[7];
x[8] = Src[8];
x[9] = Src[9];
Sum1=Sum2=0;
for(k=1;k<=4;k++){
Sum1 += abs(x[k]-x[k-1]);
Sum2 += abs(x[k+4]-x[k+5]);
}
pbi->FragmentVariances[CurrentFrag-1] += ((Sum1>255)?255:Sum1);
pbi->FragmentVariances[CurrentFrag] += ((Sum2>255)?255:Sum2);
if( Sum1 < FLimit &&
Sum2 < FLimit &&
(x[5] - x[4]) < QStep &&
(x[4] - x[5]) < QStep ){
/* low pass filtering (LPF7: 1 1 1 2 1 1 1) */
Des[0] = (x[0] + x[0] +x[0] + x[1] * 2 + x[2] + x[3] +x[4] + 4) >> 3;
Des[1] = (x[0] + x[0] +x[1] + x[2] * 2 + x[3] + x[4] +x[5] + 4) >> 3;
Des[2] = (x[0] + x[1] +x[2] + x[3] * 2 + x[4] + x[5] +x[6] + 4) >> 3;
Des[3] = (x[1] + x[2] +x[3] + x[4] * 2 + x[5] + x[6] +x[7] + 4) >> 3;
Des[4] = (x[2] + x[3] +x[4] + x[5] * 2 + x[6] + x[7] +x[8] + 4) >> 3;
Des[5] = (x[3] + x[4] +x[5] + x[6] * 2 + x[7] + x[8] +x[9] + 4) >> 3;
Des[6] = (x[4] + x[5] +x[6] + x[7] * 2 + x[8] + x[9] +x[9] + 4) >> 3;
Des[7] = (x[5] + x[6] +x[7] + x[8] * 2 + x[9] + x[9] +x[9] + 4) >> 3;
}
Src += PlaneLineStep;
Des += PlaneLineStep;
}
CurrentFrag ++;
}
}
}
static void DeblockVerticalEdgesInLoopFilteredBand(PB_INSTANCE *pbi,
unsigned char *SrcPtr,
unsigned char *DesPtr,
ogg_uint32_t PlaneLineStep,
ogg_uint32_t FragsAcross,
ogg_uint32_t StartFrag,
ogg_uint32_t *QuantScale){
ogg_uint32_t j,k;
ogg_uint32_t CurrentFrag=StartFrag;
ogg_int32_t QStep;
ogg_int32_t FLimit;
unsigned char *Src, *Des;
ogg_int32_t x[10];
ogg_int32_t Sum1, Sum2;
while(CurrentFrag < StartFrag + FragsAcross-1) {
Src=SrcPtr+8*(CurrentFrag-StartFrag+1)-5;
Des=DesPtr+8*(CurrentFrag-StartFrag+1)-4;
QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];
FLimit = ( QStep * 3)>>2 ;
for( j=0; j<8 ; j++){
x[0] = Src[0];
x[1] = Src[1];
x[2] = Src[2];
x[3] = Src[3];
x[4] = Src[4];
x[5] = Src[5];
x[6] = Src[6];
x[7] = Src[7];
x[8] = Src[8];
x[9] = Src[9];
Sum1=Sum2=0;
for(k=1;k<=4;k++){
Sum1 += abs(x[k]-x[k-1]);
Sum2 += abs(x[k+4]-x[k+5]);
}
pbi->FragmentVariances[CurrentFrag] += ((Sum1>255)?255:Sum1);
pbi->FragmentVariances[CurrentFrag+1] += ((Sum2>255)?255:Sum2);
if( Sum1 < FLimit &&
Sum2 < FLimit &&
(x[5] - x[4]) < QStep &&
(x[4] - x[5]) < QStep ){
/* low pass filtering (LPF7: 1 1 1 2 1 1 1) */
Des[0] = (x[0] + x[0] +x[0] + x[1] * 2 + x[2] + x[3] +x[4] + 4) >> 3;
Des[1] = (x[0] + x[0] +x[1] + x[2] * 2 + x[3] + x[4] +x[5] + 4) >> 3;
Des[2] = (x[0] + x[1] +x[2] + x[3] * 2 + x[4] + x[5] +x[6] + 4) >> 3;
Des[3] = (x[1] + x[2] +x[3] + x[4] * 2 + x[5] + x[6] +x[7] + 4) >> 3;
Des[4] = (x[2] + x[3] +x[4] + x[5] * 2 + x[6] + x[7] +x[8] + 4) >> 3;
Des[5] = (x[3] + x[4] +x[5] + x[6] * 2 + x[7] + x[8] +x[9] + 4) >> 3;
Des[6] = (x[4] + x[5] +x[6] + x[7] * 2 + x[8] + x[9] +x[9] + 4) >> 3;
Des[7] = (x[5] + x[6] +x[7] + x[8] * 2 + x[9] + x[9] +x[9] + 4) >> 3;
}
Src +=PlaneLineStep;
Des +=PlaneLineStep;
}
CurrentFrag ++;
}
}
static void DeblockPlane(PB_INSTANCE *pbi,
unsigned char *SourceBuffer,
unsigned char *DestinationBuffer,
ogg_uint32_t Channel ){
ogg_uint32_t i,k;
ogg_uint32_t PlaneLineStep=0;
ogg_uint32_t StartFrag =0;
ogg_uint32_t PixelIndex=0;
unsigned char * SrcPtr=0, * DesPtr=0;
ogg_uint32_t FragsAcross=0;
ogg_uint32_t FragsDown=0;
ogg_uint32_t *QuantScale=0;
switch( Channel ){
case 0:
/* Get the parameters */
PlaneLineStep = pbi->YStride;
FragsAcross = pbi->HFragments;
FragsDown = pbi->VFragments;
StartFrag = 0;
PixelIndex = pbi->ReconYDataOffset;
SrcPtr = & SourceBuffer[PixelIndex];
DesPtr = & DestinationBuffer[PixelIndex];
break;
case 1:
/* Get the parameters */
PlaneLineStep = pbi->UVStride;
FragsAcross = pbi->HFragments / 2;
FragsDown = pbi->VFragments / 2;
StartFrag = pbi->YPlaneFragments;
PixelIndex = pbi->ReconUDataOffset;
SrcPtr = & SourceBuffer[PixelIndex];
DesPtr = & DestinationBuffer[PixelIndex];
break;
default:
/* Get the parameters */
PlaneLineStep = pbi->UVStride;
FragsAcross = pbi->HFragments / 2;
FragsDown = pbi->VFragments / 2;
StartFrag = pbi->YPlaneFragments + pbi->UVPlaneFragments;
PixelIndex = pbi->ReconVDataOffset;
SrcPtr = & SourceBuffer[PixelIndex];
DesPtr = & DestinationBuffer[PixelIndex];
break;
}
QuantScale = DcQuantScaleV1;
for(i=0;i<4;i++)
memcpy(DesPtr+i*PlaneLineStep, SrcPtr+i*PlaneLineStep, PlaneLineStep);
k = 1;
while( k < FragsDown ){
SrcPtr += 8*PlaneLineStep;
DesPtr += 8*PlaneLineStep;
/* Filter both the horizontal and vertical block edges inside the band */
DeblockLoopFilteredBand(pbi, SrcPtr, DesPtr, PlaneLineStep,
FragsAcross, StartFrag, QuantScale);
/* Move Pointers */
StartFrag += FragsAcross;
k ++;
}
/* The Last band */
for(i=0;i<4;i++)
memcpy(DesPtr+(i+4)*PlaneLineStep,
SrcPtr+(i+4)*PlaneLineStep,
PlaneLineStep);
DeblockVerticalEdgesInLoopFilteredBand(pbi,SrcPtr,DesPtr,PlaneLineStep,
FragsAcross,StartFrag,QuantScale);
}
static void DeblockFrame(PB_INSTANCE *pbi, unsigned char *SourceBuffer,
unsigned char *DestinationBuffer){
memset(pbi->FragmentVariances, 0 , sizeof(ogg_int32_t) * pbi->UnitFragments);
UpdateFragQIndex(pbi);
SetupLoopFilter(pbi);
/* Y */
DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 0);
/* U */
DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 1);
/* V */
DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 2);
}
void PostProcess(PB_INSTANCE *pbi){
switch (pbi->PostProcessingLevel){
case 8:
/* on a slow machine, use a simpler and faster deblocking filter */
DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer);
break;
case 6:
DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer);
UpdateUMVBorder(pbi, pbi->PostProcessBuffer );
DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer);
break;
case 5:
DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer);
UpdateUMVBorder(pbi, pbi->PostProcessBuffer );
DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer);
break;
case 4:
DeblockFrame(pbi, pbi->LastFrameRecon, pbi->PostProcessBuffer);
break;
case 1:
UpdateFragQIndex(pbi);
break;
case 0:
break;
default:
DeblockFrame(pbi, pbi->LastFrameRecon, pbi->PostProcessBuffer);
UpdateUMVBorder(pbi, pbi->PostProcessBuffer );
DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer);
break;
}
}

View File

@ -1,48 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: pp.h,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
/* Constants. */
#define INTERNAL_BLOCK_HEIGHT 8
#define INTERNAL_BLOCK_WIDTH 8
/* NEW Line search values. */
#define UP 0
#define DOWN 1
#define LEFT 2
#define RIGHT 3
#define FIRST_ROW 0
#define NOT_EDGE_ROW 1
#define LAST_ROW 2
#define YDIFF_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
#define CHLOCALS_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
#define PMAP_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 3)
#define PSCORE_CB_ROWS (INTERNAL_BLOCK_HEIGHT * 4)
/* Status values in block coding map */
#define CANDIDATE_BLOCK_LOW -2
#define CANDIDATE_BLOCK -1
#define BLOCK_NOT_CODED 0
#define BLOCK_CODED_BAR 3
#define BLOCK_CODED_SGC 4
#define BLOCK_CODED_LOW 4
#define BLOCK_CODED 5
#define MAX_PREV_FRAMES 16
#define MAX_SEARCH_LINE_LEN 7

View File

@ -5,689 +5,115 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: quant.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
last mod: $Id$
********************************************************************/
#include <stdlib.h>
#include <string.h>
#include "encoder_internal.h"
#include "quant_lookup.h"
#include <ogg/ogg.h>
#include "quant.h"
#include "decint.h"
static ogg_uint32_t QThreshTableV1[Q_TABLE_SIZE] = {
500, 450, 400, 370, 340, 310, 285, 265,
245, 225, 210, 195, 185, 180, 170, 160,
150, 145, 135, 130, 125, 115, 110, 107,
100, 96, 93, 89, 85, 82, 75, 74,
70, 68, 64, 60, 57, 56, 52, 50,
49, 45, 44, 43, 40, 38, 37, 35,
33, 32, 30, 29, 28, 25, 24, 22,
21, 19, 18, 17, 15, 13, 12, 10
};
static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
static Q_LIST_ENTRY DcScaleFactorTableV1[ Q_TABLE_SIZE ] = {
220, 200, 190, 180, 170, 170, 160, 160,
150, 150, 140, 140, 130, 130, 120, 120,
110, 110, 100, 100, 90, 90, 90, 80,
80, 80, 70, 70, 70, 60, 60, 60,
60, 50, 50, 50, 50, 40, 40, 40,
40, 40, 30, 30, 30, 30, 30, 30,
30, 20, 20, 20, 20, 20, 20, 20,
20, 10, 10, 10, 10, 10, 10, 10
};
/* dbm -- defined some alternative tables to test header packing */
#define NEW_QTABLES 0
#if NEW_QTABLES
static Q_LIST_ENTRY Y_coeffsV1[64] =
{
8, 16, 16, 16, 20, 20, 20, 20,
16, 16, 16, 16, 20, 20, 20, 20,
16, 16, 16, 16, 22, 22, 22, 22,
16, 16, 16, 16, 22, 22, 22, 22,
20, 20, 22, 22, 24, 24, 24, 24,
20, 20, 22, 22, 24, 24, 24, 24,
20, 20, 22, 22, 24, 24, 24, 24,
20, 20, 22, 22, 24, 24, 24, 24
};
static Q_LIST_ENTRY UV_coeffsV1[64] =
{ 17, 18, 24, 47, 99, 99, 99, 99,
18, 21, 26, 66, 99, 99, 99, 99,
24, 26, 56, 99, 99, 99, 99, 99,
47, 66, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99
};
/* Different matrices for different encoder versions */
static Q_LIST_ENTRY Inter_coeffsV1[64] =
{
12, 16, 16, 16, 20, 20, 20, 20,
16, 16, 16, 16, 20, 20, 20, 20,
16, 16, 16, 16, 22, 22, 22, 22,
16, 16, 16, 16, 22, 22, 22, 22,
20, 20, 22, 22, 24, 24, 24, 24,
20, 20, 22, 22, 24, 24, 24, 24,
20, 20, 22, 22, 24, 24, 24, 24,
20, 20, 22, 22, 24, 24, 24, 24
};
#else /* these are the old VP3 values: */
static Q_LIST_ENTRY Y_coeffsV1[64] ={
16, 11, 10, 16, 24, 40, 51, 61,
12, 12, 14, 19, 26, 58, 60, 55,
14, 13, 16, 24, 40, 57, 69, 56,
14, 17, 22, 29, 51, 87, 80, 62,
18, 22, 37, 58, 68, 109, 103, 77,
24, 35, 55, 64, 81, 104, 113, 92,
49, 64, 78, 87, 103, 121, 120, 101,
72, 92, 95, 98, 112, 100, 103, 99
};
static Q_LIST_ENTRY UV_coeffsV1[64] ={
17, 18, 24, 47, 99, 99, 99, 99,
18, 21, 26, 66, 99, 99, 99, 99,
24, 26, 56, 99, 99, 99, 99, 99,
47, 66, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99
};
/* Different matrices for different encoder versions */
static Q_LIST_ENTRY Inter_coeffsV1[64] ={
16, 16, 16, 20, 24, 28, 32, 40,
16, 16, 20, 24, 28, 32, 40, 48,
16, 20, 24, 28, 32, 40, 48, 64,
20, 24, 28, 32, 40, 48, 64, 64,
24, 28, 32, 40, 48, 64, 64, 64,
28, 32, 40, 48, 64, 64, 64, 96,
32, 40, 48, 64, 64, 64, 96, 128,
40, 48, 64, 64, 64, 96, 128, 128
};
#endif
void WriteQTables(PB_INSTANCE *pbi,oggpack_buffer* opb) {
int x;
for(x=0; x<64; x++) {
oggpackB_write(opb, pbi->QThreshTable[x],16);
}
for(x=0; x<64; x++) {
oggpackB_write(opb, pbi->DcScaleFactorTable[x],16);
}
for(x=0; x<64; x++) {
oggpackB_write(opb, pbi->Y_coeffs[x],8);
}
for(x=0; x<64; x++) {
oggpackB_write(opb, pbi->UV_coeffs[x],8);
}
for(x=0; x<64; x++) {
oggpackB_write(opb, pbi->Inter_coeffs[x],8);
}
}
int ReadQTables(codec_setup_info *ci, oggpack_buffer* opb) {
long bits;
int x;
for(x=0; x<Q_TABLE_SIZE; x++) {
theora_read(opb,16,&bits);
if(bits<0)return OC_BADHEADER;
ci->QThreshTable[x]=bits;
}
for(x=0; x<Q_TABLE_SIZE; x++) {
theora_read(opb,16,&bits);
if(bits<0)return OC_BADHEADER;
ci->DcScaleFactorTable[x]=(Q_LIST_ENTRY)bits;
}
for(x=0; x<64; x++) {
theora_read(opb,8,&bits);
if(bits<0)return OC_BADHEADER;
ci->Y_coeffs[x]=(Q_LIST_ENTRY)bits;
}
for(x=0; x<64; x++) {
theora_read(opb,8,&bits);
if(bits<0)return OC_BADHEADER;
ci->UV_coeffs[x]=(Q_LIST_ENTRY)bits;
}
for(x=0; x<64; x++) {
theora_read(opb,8,&bits);
if(bits<0)return OC_BADHEADER;
ci->Inter_coeffs[x]=(Q_LIST_ENTRY)bits;
}
return 0;
}
void CopyQTables(PB_INSTANCE *pbi, codec_setup_info *ci) {
memcpy(pbi->QThreshTable, ci->QThreshTable, sizeof(pbi->QThreshTable));
memcpy(pbi->DcScaleFactorTable, ci->DcScaleFactorTable,
sizeof(pbi->DcScaleFactorTable));
memcpy(pbi->Y_coeffs, ci->Y_coeffs, sizeof(pbi->Y_coeffs));
memcpy(pbi->UV_coeffs, ci->UV_coeffs, sizeof(pbi->UV_coeffs));
memcpy(pbi->Inter_coeffs, ci->Inter_coeffs, sizeof(pbi->Inter_coeffs));
}
/* Initialize custom qtables using the VP31 values.
Someday we can change the quant tables to be adaptive, or just plain
better.*/
void InitQTables( PB_INSTANCE *pbi ){
memcpy(pbi->QThreshTable, QThreshTableV1, sizeof(pbi->QThreshTable));
memcpy(pbi->DcScaleFactorTable, DcScaleFactorTableV1,
sizeof(pbi->DcScaleFactorTable));
memcpy(pbi->Y_coeffs, Y_coeffsV1, sizeof(pbi->Y_coeffs));
memcpy(pbi->UV_coeffs, UV_coeffsV1, sizeof(pbi->UV_coeffs));
memcpy(pbi->Inter_coeffs, Inter_coeffsV1, sizeof(pbi->Inter_coeffs));
}
static void BuildQuantIndex_Generic(PB_INSTANCE *pbi){
ogg_int32_t i,j;
/* invert the dequant index into the quant index */
for ( i = 0; i < BLOCK_SIZE; i++ ){
j = dequant_index[i];
pbi->quant_index[j] = i;
}
}
static void init_quantizer ( CP_INSTANCE *cpi,
ogg_uint32_t scale_factor,
unsigned char QIndex ){
int i;
double ZBinFactor;
double RoundingFactor;
double temp_fp_quant_coeffs;
double temp_fp_quant_round;
double temp_fp_ZeroBinSize;
PB_INSTANCE *pbi = &cpi->pb;
Q_LIST_ENTRY * Inter_coeffs;
Q_LIST_ENTRY * Y_coeffs;
Q_LIST_ENTRY * UV_coeffs;
Q_LIST_ENTRY * DcScaleFactorTable;
Q_LIST_ENTRY * UVDcScaleFactorTable;
/* Notes on setup of quantisers. The initial multiplication by
the scale factor is done in the ogg_int32_t domain to insure that the
precision in the quantiser is the same as in the inverse
quantiser where all calculations are integer. The "<< 2" is a
normalisation factor for the forward DCT transform. */
/* New version rounding and ZB characteristics. */
Inter_coeffs = Inter_coeffsV1;
Y_coeffs = Y_coeffsV1;
UV_coeffs = UV_coeffsV1;
DcScaleFactorTable = DcScaleFactorTableV1;
UVDcScaleFactorTable = DcScaleFactorTableV1;
ZBinFactor = 0.9;
switch(cpi->pb.info.sharpness){
case 0:
ZBinFactor = 0.65;
if ( scale_factor <= 50 )
RoundingFactor = 0.499;
else
RoundingFactor = 0.46;
break;
case 1:
ZBinFactor = 0.75;
if ( scale_factor <= 50 )
RoundingFactor = 0.476;
else
RoundingFactor = 0.400;
break;
default:
ZBinFactor = 0.9;
if ( scale_factor <= 50 )
RoundingFactor = 0.476;
else
RoundingFactor = 0.333;
break;
/*Initializes the dequantization tables from a set of quantizer info.
Currently the dequantizer (and elsewhere enquantizer) tables are expected to
be initialized as pointing to the storage reserved for them in the
oc_theora_state (resp. oc_enc_ctx) structure.
If some tables are duplicates of others, the pointers will be adjusted to
point to a single copy of the tables, but the storage for them will not be
freed.
If you're concerned about the memory footprint, the obvious thing to do is
to move the storage out of its fixed place in the structures and allocate
it on demand.
However, a much, much better option is to only store the quantization
matrices being used for the current frame, and to recalculate these as the
qi values change between frames (this is what VP3 did).*/
void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2],
int _pp_dc_scale[64],const th_quant_info *_qinfo){
/*Coding mode: intra or inter.*/
int qti;
/*Y', C_b, C_r*/
int pli;
for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
/*Quality index.*/
int qi;
/*Range iterator.*/
int qri;
for(qi=0,qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){
th_quant_base base;
ogg_uint32_t q;
int qi_start;
int qi_end;
memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
sizeof(base));
qi_start=qi;
if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1;
else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
/*Iterate over quality indicies in this range.*/
for(;;){
ogg_uint32_t qfac;
int zzi;
int ci;
/*In the original VP3.2 code, the rounding offset and the size of the
dead zone around 0 were controlled by a "sharpness" parameter.
The size of our dead zone is now controlled by the per-coefficient
quality thresholds returned by our HVS module.
We round down from a more accurate value when the quality of the
reconstruction does not fall below our threshold and it saves bits.
Hence, all of that VP3.2 code is gone from here, and the remaining
floating point code has been implemented as equivalent integer code
with exact precision.*/
qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0];
/*For postprocessing, not dequantization.*/
if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160);
/*Scale DC the coefficient from the proper table.*/
q=(qfac/100)<<2;
q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
_dequant[qi][pli][qti][0]=(ogg_uint16_t)q;
/*Now scale AC coefficients from the proper table.*/
for(zzi=1;zzi<64;zzi++){
q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[OC_FZIG_ZAG[zzi]]/100)<<2;
q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
_dequant[qi][pli][qti][zzi]=(ogg_uint16_t)q;
}
/*If this is a duplicate of a previous matrix, use that instead.
This simple check helps us improve cache coherency later.*/
{
int dupe;
int qtj;
int plj;
dupe=0;
for(qtj=0;qtj<=qti;qtj++){
for(plj=0;plj<(qtj<qti?3:pli);plj++){
if(!memcmp(_dequant[qi][pli][qti],_dequant[qi][plj][qtj],
sizeof(oc_quant_table))){
dupe=1;
break;
}
}
if(dupe)break;
}
if(dupe)_dequant[qi][pli][qti]=_dequant[qi][plj][qtj];
}
if(++qi>=qi_end)break;
/*Interpolate the next base matrix.*/
for(ci=0;ci<64;ci++){
base[ci]=(unsigned char)(
(2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
(qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
+_qinfo->qi_ranges[qti][pli].sizes[qri])/
(2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
}
}
}
/* Use fixed multiplier for intra Y DC */
temp_fp_quant_coeffs =
(((ogg_uint32_t)(DcScaleFactorTable[QIndex] * Y_coeffs[0])/100) << 2);
if ( temp_fp_quant_coeffs < MIN_LEGAL_QUANT_ENTRY * 2 )
temp_fp_quant_coeffs = MIN_LEGAL_QUANT_ENTRY * 2;
temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
pbi->fp_quant_Y_round[0] = (ogg_int32_t) (0.5 + temp_fp_quant_round);
temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
pbi->fp_ZeroBinSize_Y[0] = (ogg_int32_t) (0.5 + temp_fp_ZeroBinSize);
temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
pbi->fp_quant_Y_coeffs[0] = (0.5 + SHIFT16 * temp_fp_quant_coeffs);
/* Intra UV */
temp_fp_quant_coeffs =
(((ogg_uint32_t)(UVDcScaleFactorTable[QIndex] * UV_coeffs[0])/100) << 2);
if ( temp_fp_quant_coeffs < MIN_LEGAL_QUANT_ENTRY * 2)
temp_fp_quant_coeffs = MIN_LEGAL_QUANT_ENTRY * 2;
temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
pbi->fp_quant_UV_round[0] = (0.5 + temp_fp_quant_round);
temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
pbi->fp_ZeroBinSize_UV[0] = (0.5 + temp_fp_ZeroBinSize);
temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
pbi->fp_quant_UV_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
/* Inter Y */
temp_fp_quant_coeffs =
(((ogg_uint32_t)(DcScaleFactorTable[QIndex] * Inter_coeffs[0])/100) << 2);
if ( temp_fp_quant_coeffs < MIN_LEGAL_QUANT_ENTRY * 4)
temp_fp_quant_coeffs = MIN_LEGAL_QUANT_ENTRY * 4;
temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
pbi->fp_quant_Inter_round[0]= (0.5 + temp_fp_quant_round);
temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
pbi->fp_ZeroBinSize_Inter[0]= (0.5 + temp_fp_ZeroBinSize);
temp_fp_quant_coeffs= 1.0 / temp_fp_quant_coeffs;
pbi->fp_quant_Inter_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
/* Inter UV */
temp_fp_quant_coeffs =
(((ogg_uint32_t)(UVDcScaleFactorTable[QIndex] * Inter_coeffs[0])/100) << 2);
if ( temp_fp_quant_coeffs < MIN_LEGAL_QUANT_ENTRY * 4)
temp_fp_quant_coeffs = MIN_LEGAL_QUANT_ENTRY * 4;
temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
pbi->fp_quant_InterUV_round[0]= (0.5 + temp_fp_quant_round);
temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
pbi->fp_ZeroBinSize_InterUV[0]= (0.5 + temp_fp_ZeroBinSize);
temp_fp_quant_coeffs= 1.0 / temp_fp_quant_coeffs;
pbi->fp_quant_InterUV_coeffs[0]=
(0.5 + SHIFT16 * temp_fp_quant_coeffs);
for ( i = 1; i < 64; i++ ){
/* now scale coefficients by required compression factor */
/* Intra Y */
temp_fp_quant_coeffs =
(((ogg_uint32_t)(scale_factor * Y_coeffs[i]) / 100 ) << 2 );
if ( temp_fp_quant_coeffs < (MIN_LEGAL_QUANT_ENTRY) )
temp_fp_quant_coeffs = (MIN_LEGAL_QUANT_ENTRY);
temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
pbi->fp_quant_Y_round[i] = (0.5 + temp_fp_quant_round);
temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
pbi->fp_ZeroBinSize_Y[i] = (0.5 + temp_fp_ZeroBinSize);
temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
pbi->fp_quant_Y_coeffs[i] = (0.5 + SHIFT16 * temp_fp_quant_coeffs);
/* Intra UV */
temp_fp_quant_coeffs =
(((ogg_uint32_t)(scale_factor * UV_coeffs[i]) / 100 ) << 2 );
if ( temp_fp_quant_coeffs < (MIN_LEGAL_QUANT_ENTRY))
temp_fp_quant_coeffs = (MIN_LEGAL_QUANT_ENTRY);
temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
pbi->fp_quant_UV_round[i] = (0.5 + temp_fp_quant_round);
temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
pbi->fp_ZeroBinSize_UV[i] = (0.5 + temp_fp_ZeroBinSize);
temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
pbi->fp_quant_UV_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
/* Inter Y */
temp_fp_quant_coeffs =
(((ogg_uint32_t)(scale_factor * Inter_coeffs[i]) / 100 ) << 2 );
if ( temp_fp_quant_coeffs < (MIN_LEGAL_QUANT_ENTRY * 2) )
temp_fp_quant_coeffs = (MIN_LEGAL_QUANT_ENTRY * 2);
temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
pbi->fp_quant_Inter_round[i]= (0.5 + temp_fp_quant_round);
temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
pbi->fp_ZeroBinSize_Inter[i]= (0.5 + temp_fp_ZeroBinSize);
temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
pbi->fp_quant_Inter_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
/* Inter UV */
temp_fp_quant_coeffs =
(((ogg_uint32_t)(scale_factor * Inter_coeffs[i]) / 100 ) << 2 );
if ( temp_fp_quant_coeffs < (MIN_LEGAL_QUANT_ENTRY * 2) )
temp_fp_quant_coeffs = (MIN_LEGAL_QUANT_ENTRY * 2);
temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
pbi->fp_quant_InterUV_round[i]= (0.5 + temp_fp_quant_round);
temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
pbi->fp_ZeroBinSize_InterUV[i]= (0.5 + temp_fp_ZeroBinSize);
temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
pbi->fp_quant_InterUV_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
}
pbi->fquant_coeffs = pbi->fp_quant_Y_coeffs;
}
void select_Y_quantiser ( PB_INSTANCE *pbi ){
pbi->fquant_coeffs = pbi->fp_quant_Y_coeffs;
pbi->fquant_round = pbi->fp_quant_Y_round;
pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Y;
}
void select_Inter_quantiser ( PB_INSTANCE *pbi ){
pbi->fquant_coeffs = pbi->fp_quant_Inter_coeffs;
pbi->fquant_round = pbi->fp_quant_Inter_round;
pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Inter;
}
void select_UV_quantiser ( PB_INSTANCE *pbi ){
pbi->fquant_coeffs = pbi->fp_quant_UV_coeffs;
pbi->fquant_round = pbi->fp_quant_UV_round;
pbi->fquant_ZbSize = pbi->fp_quant_UV_round;
}
void select_InterUV_quantiser ( PB_INSTANCE *pbi ){
pbi->fquant_coeffs = pbi->fp_quant_InterUV_coeffs;
pbi->fquant_round = pbi->fp_quant_InterUV_round;
pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_InterUV;
}
void quantize( PB_INSTANCE *pbi,
ogg_int16_t * DCT_block,
Q_LIST_ENTRY * quantized_list){
ogg_uint32_t i; /* Row index */
Q_LIST_ENTRY val; /* Quantised value. */
ogg_int32_t * FquantRoundPtr = pbi->fquant_round;
ogg_int32_t * FquantCoeffsPtr = pbi->fquant_coeffs;
ogg_int32_t * FquantZBinSizePtr = pbi->fquant_ZbSize;
ogg_int16_t * DCT_blockPtr = DCT_block;
ogg_uint32_t * QIndexPtr = (ogg_uint32_t *)pbi->quant_index;
ogg_int32_t temp;
/* Set the quantized_list to default to 0 */
memset( quantized_list, 0, 64 * sizeof(Q_LIST_ENTRY) );
/* Note that we add half divisor to effect rounding on positive number */
for( i = 0; i < VFRAGPIXELS; i++) {
/* Column 0 */
if ( DCT_blockPtr[0] >= FquantZBinSizePtr[0] ) {
temp = FquantCoeffsPtr[0] * ( DCT_blockPtr[0] + FquantRoundPtr[0] ) ;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[0]] = ( val > 511 ) ? 511 : val;
} else if ( DCT_blockPtr[0] <= -FquantZBinSizePtr[0] ) {
temp = FquantCoeffsPtr[0] *
( DCT_blockPtr[0] - FquantRoundPtr[0] ) + MIN16;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[0]] = ( val < -511 ) ? -511 : val;
}
/* Column 1 */
if ( DCT_blockPtr[1] >= FquantZBinSizePtr[1] ) {
temp = FquantCoeffsPtr[1] *
( DCT_blockPtr[1] + FquantRoundPtr[1] ) ;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[1]] = ( val > 511 ) ? 511 : val;
} else if ( DCT_blockPtr[1] <= -FquantZBinSizePtr[1] ) {
temp = FquantCoeffsPtr[1] *
( DCT_blockPtr[1] - FquantRoundPtr[1] ) + MIN16;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[1]] = ( val < -511 ) ? -511 : val;
}
/* Column 2 */
if ( DCT_blockPtr[2] >= FquantZBinSizePtr[2] ) {
temp = FquantCoeffsPtr[2] *
( DCT_blockPtr[2] + FquantRoundPtr[2] ) ;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[2]] = ( val > 511 ) ? 511 : val;
} else if ( DCT_blockPtr[2] <= -FquantZBinSizePtr[2] ) {
temp = FquantCoeffsPtr[2] *
( DCT_blockPtr[2] - FquantRoundPtr[2] ) + MIN16;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[2]] = ( val < -511 ) ? -511 : val;
}
/* Column 3 */
if ( DCT_blockPtr[3] >= FquantZBinSizePtr[3] ) {
temp = FquantCoeffsPtr[3] *
( DCT_blockPtr[3] + FquantRoundPtr[3] ) ;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[3]] = ( val > 511 ) ? 511 : val;
} else if ( DCT_blockPtr[3] <= -FquantZBinSizePtr[3] ) {
temp = FquantCoeffsPtr[3] *
( DCT_blockPtr[3] - FquantRoundPtr[3] ) + MIN16;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[3]] = ( val < -511 ) ? -511 : val;
}
/* Column 4 */
if ( DCT_blockPtr[4] >= FquantZBinSizePtr[4] ) {
temp = FquantCoeffsPtr[4] *
( DCT_blockPtr[4] + FquantRoundPtr[4] ) ;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[4]] = ( val > 511 ) ? 511 : val;
} else if ( DCT_blockPtr[4] <= -FquantZBinSizePtr[4] ) {
temp = FquantCoeffsPtr[4] *
( DCT_blockPtr[4] - FquantRoundPtr[4] ) + MIN16;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[4]] = ( val < -511 ) ? -511 : val;
}
/* Column 5 */
if ( DCT_blockPtr[5] >= FquantZBinSizePtr[5] ) {
temp = FquantCoeffsPtr[5] *
( DCT_blockPtr[5] + FquantRoundPtr[5] ) ;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[5]] = ( val > 511 ) ? 511 : val;
} else if ( DCT_blockPtr[5] <= -FquantZBinSizePtr[5] ) {
temp = FquantCoeffsPtr[5] *
( DCT_blockPtr[5] - FquantRoundPtr[5] ) + MIN16;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[5]] = ( val < -511 ) ? -511 : val;
}
/* Column 6 */
if ( DCT_blockPtr[6] >= FquantZBinSizePtr[6] ) {
temp = FquantCoeffsPtr[6] *
( DCT_blockPtr[6] + FquantRoundPtr[6] ) ;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[6]] = ( val > 511 ) ? 511 : val;
} else if ( DCT_blockPtr[6] <= -FquantZBinSizePtr[6] ) {
temp = FquantCoeffsPtr[6] *
( DCT_blockPtr[6] - FquantRoundPtr[6] ) + MIN16;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[6]] = ( val < -511 ) ? -511 : val;
}
/* Column 7 */
if ( DCT_blockPtr[7] >= FquantZBinSizePtr[7] ) {
temp = FquantCoeffsPtr[7] *
( DCT_blockPtr[7] + FquantRoundPtr[7] ) ;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[7]] = ( val > 511 ) ? 511 : val;
} else if ( DCT_blockPtr[7] <= -FquantZBinSizePtr[7] ) {
temp = FquantCoeffsPtr[7] *
( DCT_blockPtr[7] - FquantRoundPtr[7] ) + MIN16;
val = (Q_LIST_ENTRY) (temp>>16);
quantized_list[QIndexPtr[7]] = ( val < -511 ) ? -511 : val;
}
FquantRoundPtr += 8;
FquantCoeffsPtr += 8;
FquantZBinSizePtr += 8;
DCT_blockPtr += 8;
QIndexPtr += 8;
}
}
static void init_dequantizer ( PB_INSTANCE *pbi,
ogg_uint32_t scale_factor,
unsigned char QIndex ){
int i, j;
Q_LIST_ENTRY * Inter_coeffs;
Q_LIST_ENTRY * Y_coeffs;
Q_LIST_ENTRY * UV_coeffs;
Q_LIST_ENTRY * DcScaleFactorTable;
Q_LIST_ENTRY * UVDcScaleFactorTable;
Inter_coeffs = pbi->Inter_coeffs;
Y_coeffs = pbi->Y_coeffs;
UV_coeffs = pbi->UV_coeffs;
DcScaleFactorTable = pbi->DcScaleFactorTable;
UVDcScaleFactorTable = pbi->DcScaleFactorTable;
/* invert the dequant index into the quant index
the dxer has a different order than the cxer. */
BuildQuantIndex_Generic(pbi);
/* Reorder dequantisation coefficients into dct zigzag order. */
for ( i = 0; i < BLOCK_SIZE; i++ ) {
j = pbi->quant_index[i];
pbi->dequant_Y_coeffs[j] = Y_coeffs[i];
}
for ( i = 0; i < BLOCK_SIZE; i++ ){
j = pbi->quant_index[i];
pbi->dequant_Inter_coeffs[j] = Inter_coeffs[i];
}
for ( i = 0; i < BLOCK_SIZE; i++ ){
j = pbi->quant_index[i];
pbi->dequant_UV_coeffs[j] = UV_coeffs[i];
}
for ( i = 0; i < BLOCK_SIZE; i++ ){
j = pbi->quant_index[i];
pbi->dequant_InterUV_coeffs[j] = Inter_coeffs[i];
}
/* Intra Y */
pbi->dequant_Y_coeffs[0] =
((DcScaleFactorTable[QIndex] * pbi->dequant_Y_coeffs[0])/100);
if ( pbi->dequant_Y_coeffs[0] < MIN_DEQUANT_VAL * 2 )
pbi->dequant_Y_coeffs[0] = MIN_DEQUANT_VAL * 2;
pbi->dequant_Y_coeffs[0] =
pbi->dequant_Y_coeffs[0] << IDCT_SCALE_FACTOR;
/* Intra UV */
pbi->dequant_UV_coeffs[0] =
((UVDcScaleFactorTable[QIndex] * pbi->dequant_UV_coeffs[0])/100);
if ( pbi->dequant_UV_coeffs[0] < MIN_DEQUANT_VAL * 2 )
pbi->dequant_UV_coeffs[0] = MIN_DEQUANT_VAL * 2;
pbi->dequant_UV_coeffs[0] =
pbi->dequant_UV_coeffs[0] << IDCT_SCALE_FACTOR;
/* Inter Y */
pbi->dequant_Inter_coeffs[0] =
((DcScaleFactorTable[QIndex] * pbi->dequant_Inter_coeffs[0])/100);
if ( pbi->dequant_Inter_coeffs[0] < MIN_DEQUANT_VAL * 4 )
pbi->dequant_Inter_coeffs[0] = MIN_DEQUANT_VAL * 4;
pbi->dequant_Inter_coeffs[0] =
pbi->dequant_Inter_coeffs[0] << IDCT_SCALE_FACTOR;
/* Inter UV */
pbi->dequant_InterUV_coeffs[0] =
((UVDcScaleFactorTable[QIndex] * pbi->dequant_InterUV_coeffs[0])/100);
if ( pbi->dequant_InterUV_coeffs[0] < MIN_DEQUANT_VAL * 4 )
pbi->dequant_InterUV_coeffs[0] = MIN_DEQUANT_VAL * 4;
pbi->dequant_InterUV_coeffs[0] =
pbi->dequant_InterUV_coeffs[0] << IDCT_SCALE_FACTOR;
for ( i = 1; i < 64; i++ ){
/* now scale coefficients by required compression factor */
pbi->dequant_Y_coeffs[i] =
(( scale_factor * pbi->dequant_Y_coeffs[i] ) / 100);
if ( pbi->dequant_Y_coeffs[i] < MIN_DEQUANT_VAL )
pbi->dequant_Y_coeffs[i] = MIN_DEQUANT_VAL;
pbi->dequant_Y_coeffs[i] =
pbi->dequant_Y_coeffs[i] << IDCT_SCALE_FACTOR;
pbi->dequant_UV_coeffs[i] =
(( scale_factor * pbi->dequant_UV_coeffs[i] ) / 100);
if ( pbi->dequant_UV_coeffs[i] < MIN_DEQUANT_VAL )
pbi->dequant_UV_coeffs[i] = MIN_DEQUANT_VAL;
pbi->dequant_UV_coeffs[i] =
pbi->dequant_UV_coeffs[i] << IDCT_SCALE_FACTOR;
pbi->dequant_Inter_coeffs[i] =
(( scale_factor * pbi->dequant_Inter_coeffs[i] ) / 100);
if ( pbi->dequant_Inter_coeffs[i] < (MIN_DEQUANT_VAL * 2) )
pbi->dequant_Inter_coeffs[i] = MIN_DEQUANT_VAL * 2;
pbi->dequant_Inter_coeffs[i] =
pbi->dequant_Inter_coeffs[i] << IDCT_SCALE_FACTOR;
pbi->dequant_InterUV_coeffs[i] =
(( scale_factor * pbi->dequant_InterUV_coeffs[i] ) / 100);
if ( pbi->dequant_InterUV_coeffs[i] < (MIN_DEQUANT_VAL * 2) )
pbi->dequant_InterUV_coeffs[i] = MIN_DEQUANT_VAL * 2;
pbi->dequant_InterUV_coeffs[i] =
pbi->dequant_InterUV_coeffs[i] << IDCT_SCALE_FACTOR;
}
pbi->dequant_coeffs = pbi->dequant_Y_coeffs;
}
void UpdateQ( PB_INSTANCE *pbi, ogg_uint32_t NewQ ){
ogg_uint32_t qscale;
/* Do bounds checking and convert to a float. */
qscale = NewQ;
if ( qscale < pbi->QThreshTable[Q_TABLE_SIZE-1] )
qscale = pbi->QThreshTable[Q_TABLE_SIZE-1];
else if ( qscale > pbi->QThreshTable[0] )
qscale = pbi->QThreshTable[0];
/* Set the inter/intra descision control variables. */
pbi->FrameQIndex = Q_TABLE_SIZE - 1;
while ( (ogg_int32_t) pbi->FrameQIndex >= 0 ) {
if ( (pbi->FrameQIndex == 0) ||
( pbi->QThreshTable[pbi->FrameQIndex] >= NewQ) )
break;
pbi->FrameQIndex --;
}
/* Re-initialise the q tables for forward and reverse transforms. */
init_dequantizer ( pbi, qscale, (unsigned char) pbi->FrameQIndex );
}
void UpdateQC( CP_INSTANCE *cpi, ogg_uint32_t NewQ ){
ogg_uint32_t qscale;
PB_INSTANCE *pbi = &cpi->pb;
/* Do bounds checking and convert to a float. */
qscale = NewQ;
if ( qscale < pbi->QThreshTable[Q_TABLE_SIZE-1] )
qscale = pbi->QThreshTable[Q_TABLE_SIZE-1];
else if ( qscale > pbi->QThreshTable[0] )
qscale = pbi->QThreshTable[0];
/* Set the inter/intra descision control variables. */
pbi->FrameQIndex = Q_TABLE_SIZE - 1;
while ((ogg_int32_t) pbi->FrameQIndex >= 0 ) {
if ( (pbi->FrameQIndex == 0) ||
( pbi->QThreshTable[pbi->FrameQIndex] >= NewQ) )
break;
pbi->FrameQIndex --;
}
/* Re-initialise the q tables for forward and reverse transforms. */
init_quantizer ( cpi, qscale, (unsigned char) pbi->FrameQIndex );
init_dequantizer ( pbi, qscale, (unsigned char) pbi->FrameQIndex );
}

View File

@ -0,0 +1,33 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: quant.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#if !defined(_quant_H)
# define _quant_H (1)
# include "theora/codec.h"
# include "ocintrin.h"
typedef ogg_uint16_t oc_quant_table[64];
/*Maximum scaled quantizer value.*/
#define OC_QUANT_MAX (1024<<2)
void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2],
int _pp_dc_scale[64],const th_quant_info *_qinfo);
#endif

View File

@ -1,37 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: quant_lookup.h,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include "encoder_internal.h"
#define MIN16 ((1<<16)-1)
#define SHIFT16 (1<<16)
#define MIN_LEGAL_QUANT_ENTRY 8
#define MIN_DEQUANT_VAL 2
#define IDCT_SCALE_FACTOR 2 /* Shift left bits to improve IDCT precision */
#define OLD_SCHEME 1
static ogg_uint32_t dequant_index[64] = {
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63
};

File diff suppressed because it is too large Load Diff

View File

@ -1,85 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: reconstruct.c,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include "encoder_internal.h"
void ReconIntra( PB_INSTANCE *pbi, unsigned char * ReconPtr,
ogg_int16_t * ChangePtr, ogg_uint32_t LineStep ) {
ogg_uint32_t i;
for ( i = 0; i < BLOCK_HEIGHT_WIDTH; i++ ){
/* Convert the data back to 8 bit unsigned */
/* Saturate the output to unsigend 8 bit values */
ReconPtr[0] = clamp255( ChangePtr[0] + 128 );
ReconPtr[1] = clamp255( ChangePtr[1] + 128 );
ReconPtr[2] = clamp255( ChangePtr[2] + 128 );
ReconPtr[3] = clamp255( ChangePtr[3] + 128 );
ReconPtr[4] = clamp255( ChangePtr[4] + 128 );
ReconPtr[5] = clamp255( ChangePtr[5] + 128 );
ReconPtr[6] = clamp255( ChangePtr[6] + 128 );
ReconPtr[7] = clamp255( ChangePtr[7] + 128 );
ReconPtr += LineStep;
ChangePtr += BLOCK_HEIGHT_WIDTH;
}
}
void ReconInter( PB_INSTANCE *pbi, unsigned char * ReconPtr,
unsigned char * RefPtr, ogg_int16_t * ChangePtr,
ogg_uint32_t LineStep ) {
ogg_uint32_t i;
for ( i = 0; i < BLOCK_HEIGHT_WIDTH; i++) {
ReconPtr[0] = clamp255(RefPtr[0] + ChangePtr[0]);
ReconPtr[1] = clamp255(RefPtr[1] + ChangePtr[1]);
ReconPtr[2] = clamp255(RefPtr[2] + ChangePtr[2]);
ReconPtr[3] = clamp255(RefPtr[3] + ChangePtr[3]);
ReconPtr[4] = clamp255(RefPtr[4] + ChangePtr[4]);
ReconPtr[5] = clamp255(RefPtr[5] + ChangePtr[5]);
ReconPtr[6] = clamp255(RefPtr[6] + ChangePtr[6]);
ReconPtr[7] = clamp255(RefPtr[7] + ChangePtr[7]);
ChangePtr += BLOCK_HEIGHT_WIDTH;
ReconPtr += LineStep;
RefPtr += LineStep;
}
}
void ReconInterHalfPixel2( PB_INSTANCE *pbi, unsigned char * ReconPtr,
unsigned char * RefPtr1, unsigned char * RefPtr2,
ogg_int16_t * ChangePtr, ogg_uint32_t LineStep ) {
ogg_uint32_t i;
for ( i = 0; i < BLOCK_HEIGHT_WIDTH; i++ ){
ReconPtr[0] = clamp255((((int)RefPtr1[0] + (int)RefPtr2[0]) >> 1) + ChangePtr[0] );
ReconPtr[1] = clamp255((((int)RefPtr1[1] + (int)RefPtr2[1]) >> 1) + ChangePtr[1] );
ReconPtr[2] = clamp255((((int)RefPtr1[2] + (int)RefPtr2[2]) >> 1) + ChangePtr[2] );
ReconPtr[3] = clamp255((((int)RefPtr1[3] + (int)RefPtr2[3]) >> 1) + ChangePtr[3] );
ReconPtr[4] = clamp255((((int)RefPtr1[4] + (int)RefPtr2[4]) >> 1) + ChangePtr[4] );
ReconPtr[5] = clamp255((((int)RefPtr1[5] + (int)RefPtr2[5]) >> 1) + ChangePtr[5] );
ReconPtr[6] = clamp255((((int)RefPtr1[6] + (int)RefPtr2[6]) >> 1) + ChangePtr[6] );
ReconPtr[7] = clamp255((((int)RefPtr1[7] + (int)RefPtr2[7]) >> 1) + ChangePtr[7] );
ChangePtr += BLOCK_HEIGHT_WIDTH;
ReconPtr += LineStep;
RefPtr1 += LineStep;
RefPtr2 += LineStep;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,591 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $
********************************************************************/
/**\mainpage
*
* \section intro Introduction
*
* This is the documentation for <tt>libtheora</tt> C API.
* The current reference
* implementation for <a href="http://www.theora.org/">Theora</a>, a free,
* patent-unencumbered video codec.
* Theora is derived from On2's VP3 codec with additional features and
* integration with Ogg multimedia formats by
* <a href="http://www.xiph.org/">the Xiph.Org Foundation</a>.
* Complete documentation of the format itself is available in
* <a href="http://www.theora.org/doc/Theora.pdf">the Theora
* specification</a>.
*
* \subsection Organization
*
* The functions documented here are actually subdivided into three
* separate libraries:
* - <tt>libtheoraenc</tt> contains the encoder interface,
* described in \ref encfuncs.
* - <tt>libtheoradec</tt> contains the decoder interface and
* routines shared with the encoder.
* You must also link to this if you link to <tt>libtheoraenc</tt>.
* The routines in this library are described in \ref decfuncs and
* \ref basefuncs.
* - <tt>libtheora</tt> contains the \ref oldfuncs.
*
* New code should link to <tt>libtheoradec</tt> and, if using encoder
* features, <tt>libtheoraenc</tt>. Together these two export both
* the standard and the legacy API, so this is all that is needed by
* any code. The older <tt>libtheora</tt> library is provided just for
* compatibility with older build configurations.
*
* In general the recommended 1.x API symbols can be distinguished
* by their <tt>th_</tt> or <tt>TH_</tt> namespace prefix.
* The older, legacy API uses <tt>theora_</tt> or <tt>OC_</tt>
* prefixes instead.
*/
/**\file
* The shared <tt>libtheoradec</tt> and <tt>libtheoraenc</tt> C API.
* You don't need to include this directly.*/
#if !defined(_O_THEORA_CODEC_H_)
# define _O_THEORA_CODEC_H_ (1)
# include <ogg/ogg.h>
#if defined(__cplusplus)
extern "C" {
#endif
/**\name Return codes*/
/*@{*/
/**An invalid pointer was provided.*/
#define TH_EFAULT (-1)
/**An invalid argument was provided.*/
#define TH_EINVAL (-10)
/**The contents of the header were incomplete, invalid, or unexpected.*/
#define TH_EBADHEADER (-20)
/**The header does not belong to a Theora stream.*/
#define TH_ENOTFORMAT (-21)
/**The bitstream version is too high.*/
#define TH_EVERSION (-22)
/**The specified function is not implemented.*/
#define TH_EIMPL (-23)
/**There were errors in the video data packet.*/
#define TH_EBADPACKET (-24)
/**The decoded packet represented a dropped frame.
The player can continue to display the current frame, as the contents of the
decoded frame buffer have not changed.*/
#define TH_DUPFRAME (1)
/*@}*/
/**The currently defined color space tags.
* See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
* specification</a>, Chapter 4, for exact details on the meaning
* of each of these color spaces.*/
typedef enum{
/**The color space was not specified at the encoder.
It may be conveyed by an external means.*/
TH_CS_UNSPECIFIED,
/**A color space designed for NTSC content.*/
TH_CS_ITU_REC_470M,
/**A color space designed for PAL/SECAM content.*/
TH_CS_ITU_REC_470BG,
/**The total number of currently defined color spaces.*/
TH_CS_NSPACES
}th_colorspace;
/**The currently defined pixel format tags.
* See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
* specification</a>, Section 4.4, for details on the precise sample
* locations.*/
typedef enum{
/**Chroma decimation by 2 in both the X and Y directions (4:2:0).
The Cb and Cr chroma planes are half the width and half the
height of the luma plane.*/
TH_PF_420,
/**Currently reserved.*/
TH_PF_RSVD,
/**Chroma decimation by 2 in the X direction (4:2:2).
The Cb and Cr chroma planes are half the width of the luma plane, but full
height.*/
TH_PF_422,
/**No chroma decimation (4:4:4).
The Cb and Cr chroma planes are full width and full height.*/
TH_PF_444,
/**The total number of currently defined pixel formats.*/
TH_PF_NFORMATS
}th_pixel_fmt;
/**A buffer for a single color plane in an uncompressed image.
* This contains the image data in a left-to-right, top-down format.
* Each row of pixels is stored contiguously in memory, but successive
* rows need not be.
* Use \a stride to compute the offset of the next row.
* The encoder accepts both positive \a stride values (top-down in memory)
* and negative (bottom-up in memory).
* The decoder currently always generates images with positive strides.*/
typedef struct{
/**The width of this plane.*/
int width;
/**The height of this plane.*/
int height;
/**The offset in bytes between successive rows.*/
int stride;
/**A pointer to the beginning of the first row.*/
unsigned char *data;
}th_img_plane;
/**A complete image buffer for an uncompressed frame.
* The chroma planes may be decimated by a factor of two in either
* direction, as indicated by th_info#pixel_fmt.
* The width and height of the Y' plane must be multiples of 16.
* They may need to be cropped for display, using the rectangle
* specified by th_info#pic_x, th_info#pic_y, th_info#pic_width,
* and th_info#pic_height.
* All samples are 8 bits.
* \note The term YUV often used to describe a colorspace is ambiguous.
* The exact parameters of the RGB to YUV conversion process aside, in
* many contexts the U and V channels actually have opposite meanings.
* To avoid this confusion, we are explicit: the name of the color
* channels are Y'CbCr, and they appear in that order, always.
* The prime symbol denotes that the Y channel is non-linear.
* Cb and Cr stand for "Chroma blue" and "Chroma red", respectively.*/
typedef th_img_plane th_ycbcr_buffer[3];
/**Theora bitstream information.
* This contains the basic playback parameters for a stream, and corresponds to
* the initial 'info' header packet.
* To initialize an encoder, the application fills in this structure and
* passes it to th_encode_alloc().
* A default encoding mode is chosen based on the values of the #quality and
* #target_bitrate fields.
* On decode, it is filled in by th_decode_headerin(), and then passed to
* th_decode_alloc().
*
* Encoded Theora frames must be a multiple of 16 in size;
* this is what the #frame_width and #frame_height members represent.
* To handle arbitrary picture sizes, a crop rectangle is specified in the
* #pic_x, #pic_y, #pic_width and #pic_height members.
*
* All frame buffers contain pointers to the full, padded frame.
* However, the current encoder <em>will not</em> reference pixels outside of
* the cropped picture region, and the application does not need to fill them
* in.
* The decoder <em>will</em> allocate storage for a full frame, but the
* application <em>should not</em> rely on the padding containing sensible
* data.
*
* It is also generally recommended that the offsets and sizes should still be
* multiples of 2 to avoid chroma sampling shifts when chroma is sub-sampled.
* See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
* specification</a>, Section 4.4, for more details.
*
* Frame rate, in frames per second, is stored as a rational fraction, as is
* the pixel aspect ratio.
* Note that this refers to the aspect ratio of the individual pixels, not of
* the overall frame itself.
* The frame aspect ratio can be computed from pixel aspect ratio using the
* image dimensions.*/
typedef struct{
/**\name Theora version
* Bitstream version information.*/
/*@{*/
unsigned char version_major;
unsigned char version_minor;
unsigned char version_subminor;
/*@}*/
/**The encoded frame width.
* This must be a multiple of 16, and less than 1048576.*/
ogg_uint32_t frame_width;
/**The encoded frame height.
* This must be a multiple of 16, and less than 1048576.*/
ogg_uint32_t frame_height;
/**The displayed picture width.
* This must be no larger than width.*/
ogg_uint32_t pic_width;
/**The displayed picture height.
* This must be no larger than height.*/
ogg_uint32_t pic_height;
/**The X offset of the displayed picture.
* This must be no larger than #frame_width-#pic_width or 255, whichever is
* smaller.*/
ogg_uint32_t pic_x;
/**The Y offset of the displayed picture.
* This must be no larger than #frame_height-#pic_height, and
* #frame_height-#pic_height-#pic_y must be no larger than 255.
* This slightly funny restriction is due to the fact that the offset is
* specified from the top of the image for consistency with the standard
* graphics left-handed coordinate system used throughout this API, while
* it is stored in the encoded stream as an offset from the bottom.*/
ogg_uint32_t pic_y;
/**\name Frame rate
* The frame rate, as a fraction.
* If either is 0, the frame rate is undefined.*/
/*@{*/
ogg_uint32_t fps_numerator;
ogg_uint32_t fps_denominator;
/*@}*/
/**\name Aspect ratio
* The aspect ratio of the pixels.
* If either value is zero, the aspect ratio is undefined.
* If not specified by any external means, 1:1 should be assumed.
* The aspect ratio of the full picture can be computed as
* \code
* aspect_numerator*pic_width/(aspect_denominator*pic_height).
* \endcode */
/*@{*/
ogg_uint32_t aspect_numerator;
ogg_uint32_t aspect_denominator;
/*@}*/
/**The color space.*/
th_colorspace colorspace;
/**The pixel format.*/
th_pixel_fmt pixel_fmt;
/**The target bit-rate in bits per second.
If initializing an encoder with this struct, set this field to a non-zero
value to activate CBR encoding by default.*/
int target_bitrate;
/**The target quality level.
Valid values range from 0 to 63, inclusive, with higher values giving
higher quality.
If initializing an encoder with this struct, and #target_bitrate is set
to zero, VBR encoding at this quality will be activated by default.*/
/*Currently this is set so that a qi of 0 corresponds to distortions of 24
times the JND, and each increase by 16 halves that value.
This gives us fine discrimination at low qualities, yet effective rate
control at high qualities.
The qi value 63 is special, however.
For this, the highest quality, we use one half of a JND for our threshold.
Due to the lower bounds placed on allowable quantizers in Theora, we will
not actually be able to achieve quality this good, but this should
provide as close to visually lossless quality as Theora is capable of.
We could lift the quantizer restrictions without breaking VP3.1
compatibility, but this would result in quantized coefficients that are
too large for the current bitstream to be able to store.
We'd have to redesign the token syntax to store these large coefficients,
which would make transcoding complex.*/
int quality;
/**The amount to shift to extract the last keyframe number from the granule
* position.
* This can be at most 31.
* th_info_init() will set this to a default value (currently <tt>6</tt>,
* which is good for streaming applications), but you can set it to 0 to
* make every frame a keyframe.
* The maximum distance between key frames is
* <tt>1<<#keyframe_granule_shift</tt>.
* The keyframe frequency can be more finely controlled with
* #TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, which can also be adjusted
* during encoding (for example, to force the next frame to be a keyframe),
* but it cannot be set larger than the amount permitted by this field after
* the headers have been output.*/
int keyframe_granule_shift;
}th_info;
/**The comment information.
*
* This structure holds the in-stream metadata corresponding to
* the 'comment' header packet.
* The comment header is meant to be used much like someone jotting a quick
* note on the label of a video.
* It should be a short, to the point text note that can be more than a couple
* words, but not more than a short paragraph.
*
* The metadata is stored as a series of (tag, value) pairs, in
* length-encoded string vectors.
* The first occurrence of the '=' character delimits the tag and value.
* A particular tag may occur more than once, and order is significant.
* The character set encoding for the strings is always UTF-8, but the tag
* names are limited to ASCII, and treated as case-insensitive.
* See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
* specification</a>, Section 6.3.3 for details.
*
* In filling in this structure, th_decode_headerin() will null-terminate
* the user_comment strings for safety.
* However, the bitstream format itself treats them as 8-bit clean vectors,
* possibly containing null characters, and so the length array should be
* treated as their authoritative length.
*/
typedef struct th_comment{
/**The array of comment string vectors.*/
char **user_comments;
/**An array of the corresponding length of each vector, in bytes.*/
int *comment_lengths;
/**The total number of comment strings.*/
int comments;
/**The null-terminated vendor string.
This identifies the software used to encode the stream.*/
char *vendor;
}th_comment;
/**A single base matrix.*/
typedef unsigned char th_quant_base[64];
/**A set of \a qi ranges.*/
typedef struct{
/**The number of ranges in the set.*/
int nranges;
/**The size of each of the #nranges ranges.
These must sum to 63.*/
const int *sizes;
/**#nranges <tt>+1</tt> base matrices.
Matrices \a i and <tt>i+1</tt> form the endpoints of range \a i.*/
const th_quant_base *base_matrices;
}th_quant_ranges;
/**A complete set of quantization parameters.
The quantizer for each coefficient is calculated as:
\code
Q=MAX(MIN(qmin[qti][ci!=0],scale[ci!=0][qi]*base[qti][pli][qi][ci]/100),
1024).
\endcode
\a qti is the quantization type index: 0 for intra, 1 for inter.
<tt>ci!=0</tt> is 0 for the DC coefficient and 1 for AC coefficients.
\a qi is the quality index, ranging between 0 (low quality) and 63 (high
quality).
\a pli is the color plane index: 0 for Y', 1 for Cb, 2 for Cr.
\a ci is the DCT coefficient index.
Coefficient indices correspond to the normal 2D DCT block
ordering--row-major with low frequencies first--\em not zig-zag order.
Minimum quantizers are constant, and are given by:
\code
qmin[2][2]={{4,2},{8,4}}.
\endcode
Parameters that can be stored in the bitstream are as follows:
- The two scale matrices ac_scale and dc_scale.
\code
scale[2][64]={dc_scale,ac_scale}.
\endcode
- The base matrices for each \a qi, \a qti and \a pli (up to 384 in all).
In order to avoid storing a full 384 base matrices, only a sparse set of
matrices are stored, and the rest are linearly interpolated.
This is done as follows.
For each \a qti and \a pli, a series of \a n \a qi ranges is defined.
The size of each \a qi range can vary arbitrarily, but they must sum to
63.
Then, <tt>n+1</tt> matrices are specified, one for each endpoint of the
ranges.
For interpolation purposes, each range's endpoints are the first \a qi
value it contains and one past the last \a qi value it contains.
Fractional values are rounded to the nearest integer, with ties rounded
away from zero.
Base matrices are stored by reference, so if the same matrices are used
multiple times, they will only appear once in the bitstream.
The bitstream is also capable of omitting an entire set of ranges and
its associated matrices if they are the same as either the previous
set (indexed in row-major order) or if the inter set is the same as the
intra set.
- Loop filter limit values.
The same limits are used for the loop filter in all color planes, despite
potentially differing levels of quantization in each.
For the current encoder, <tt>scale[ci!=0][qi]</tt> must be no greater
than <tt>scale[ci!=0][qi-1]</tt> and <tt>base[qti][pli][qi][ci]</tt> must
be no greater than <tt>base[qti][pli][qi-1][ci]</tt>.
These two conditions ensure that the actual quantizer for a given \a qti,
\a pli, and \a ci does not increase as \a qi increases.
This is not required by the decoder.*/
typedef struct{
/**The DC scaling factors.*/
ogg_uint16_t dc_scale[64];
/**The AC scaling factors.*/
ogg_uint16_t ac_scale[64];
/**The loop filter limit values.*/
unsigned char loop_filter_limits[64];
/**The \a qi ranges for each \a ci and \a pli.*/
th_quant_ranges qi_ranges[2][3];
}th_quant_info;
/**The number of Huffman tables used by Theora.*/
#define TH_NHUFFMAN_TABLES (80)
/**The number of DCT token values in each table.*/
#define TH_NDCT_TOKENS (32)
/**A Huffman code for a Theora DCT token.
* Each set of Huffman codes in a given table must form a complete, prefix-free
* code.
* There is no requirement that all the tokens in a table have a valid code,
* but the current encoder is not optimized to take advantage of this.
* If each of the five grouops of 16 tables does not contain at least one table
* with a code for every token, then the encoder may fail to encode certain
* frames.
* The complete table in the first group of 16 does not have to be in the same
* place as the complete table in the other groups, but the complete tables in
* the remaining four groups must all be in the same place.*/
typedef struct{
/**The bit pattern for the code, with the LSbit of the pattern aligned in
* the LSbit of the word.*/
ogg_uint32_t pattern;
/**The number of bits in the code.
* This must be between 0 and 32, inclusive.*/
int nbits;
}th_huff_code;
/**\defgroup basefuncs Functions Shared by Encode and Decode*/
/*@{*/
/**\name Basic shared functions*/
/*@{*/
/**Retrieves a human-readable string to identify the library vendor and
* version.
* \return the version string.*/
extern const char *th_version_string(void);
/**Retrieves the library version number.
* This is the highest bitstream version that the encoder library will produce,
* or that the decoder library can decode.
* This number is composed of a 16-bit major version, 8-bit minor version
* and 8 bit sub-version, composed as follows:
* \code
* (VERSION_MAJOR<<16)+(VERSION_MINOR<<8)+(VERSION_SUBMINOR)
* \endcode
* \return the version number.*/
extern ogg_uint32_t th_version_number(void);
/**Converts a granule position to an absolute frame index, starting at
* <tt>0</tt>.
* The granule position is interpreted in the context of a given
* #th_enc_ctx or #th_dec_ctx handle (either will suffice).
* \param _encdec A previously allocated #th_enc_ctx or #th_dec_ctx
* handle.
* \param _granpos The granule position to convert.
* \returns The absolute frame index corresponding to \a _granpos.
* \retval -1 The given granule position was invalid (i.e. negative).*/
extern ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos);
/**Converts a granule position to an absolute time in seconds.
* The granule position is interpreted in the context of a given
* #th_enc_ctx or #th_dec_ctx handle (either will suffice).
* \param _encdec A previously allocated #th_enc_ctx or #th_dec_ctx
* handle.
* \param _granpos The granule position to convert.
* \return The absolute time in seconds corresponding to \a _granpos.
* This is the "end time" for the frame, or the latest time it should
* be displayed.
* It is not the presentation time.
* \retval -1 The given granule position was invalid (i.e. negative).*/
extern double th_granule_time(void *_encdec,ogg_int64_t _granpos);
/**Determines whether a Theora packet is a header or not.
* This function does no verification beyond checking the packet type bit, so
* it should not be used for bitstream identification; use
* th_decode_headerin() for that.
* As per the Theora specification, an empty (0-byte) packet is treated as a
* data packet (a delta frame with no coded blocks).
* \param _op An <tt>ogg_packet</tt> containing encoded Theora data.
* \retval 1 The packet is a header packet
* \retval 0 The packet is a video data packet.*/
extern int th_packet_isheader(ogg_packet *_op);
/**Determines whether a theora packet is a key frame or not.
* This function does no verification beyond checking the packet type and
* key frame bits, so it should not be used for bitstream identification; use
* th_decode_headerin() for that.
* As per the Theora specification, an empty (0-byte) packet is treated as a
* delta frame (with no coded blocks).
* \param _op An <tt>ogg_packet</tt> containing encoded Theora data.
* \retval 1 The packet contains a key frame.
* \retval 0 The packet contains a delta frame.
* \retval -1 The packet is not a video data packet.*/
extern int th_packet_iskeyframe(ogg_packet *_op);
/*@}*/
/**\name Functions for manipulating header data*/
/*@{*/
/**Initializes a th_info structure.
* This should be called on a freshly allocated #th_info structure before
* attempting to use it.
* \param _info The #th_info struct to initialize.*/
extern void th_info_init(th_info *_info);
/**Clears a #th_info structure.
* This should be called on a #th_info structure after it is no longer
* needed.
* \param _info The #th_info struct to clear.*/
extern void th_info_clear(th_info *_info);
/**Initialize a #th_comment structure.
* This should be called on a freshly allocated #th_comment structure
* before attempting to use it.
* \param _tc The #th_comment struct to initialize.*/
extern void th_comment_init(th_comment *_tc);
/**Add a comment to an initialized #th_comment structure.
* \note Neither th_comment_add() nor th_comment_add_tag() support
* comments containing null values, although the bitstream format does
* support them.
* To add such comments you will need to manipulate the #th_comment
* structure directly.
* \param _tc The #th_comment struct to add the comment to.
* \param _comment Must be a null-terminated UTF-8 string containing the
* comment in "TAG=the value" form.*/
extern void th_comment_add(th_comment *_tc, char *_comment);
/**Add a comment to an initialized #th_comment structure.
* \note Neither th_comment_add() nor th_comment_add_tag() support
* comments containing null values, although the bitstream format does
* support them.
* To add such comments you will need to manipulate the #th_comment
* structure directly.
* \param _tc The #th_comment struct to add the comment to.
* \param _tag A null-terminated string containing the tag associated with
* the comment.
* \param _val The corresponding value as a null-terminated string.*/
extern void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val);
/**Look up a comment value by its tag.
* \param _tc An initialized #th_comment structure.
* \param _tag The tag to look up.
* \param _count The instance of the tag.
* The same tag can appear multiple times, each with a distinct
* value, so an index is required to retrieve them all.
* The order in which these values appear is significant and
* should be preserved.
* Use th_comment_query_count() to get the legal range for
* the \a _count parameter.
* \return A pointer to the queried tag's value.
* This points directly to data in the #th_comment structure.
* It should not be modified or freed by the application, and
* modifications to the structure may invalidate the pointer.
* \retval NULL If no matching tag is found.*/
extern char *th_comment_query(th_comment *_tc,char *_tag,int _count);
/**Look up the number of instances of a tag.
* Call this first when querying for a specific tag and then iterate over the
* number of instances with separate calls to th_comment_query() to
* retrieve all the values for that tag in order.
* \param _tc An initialized #th_comment structure.
* \param _tag The tag to look up.
* \return The number on instances of this particular tag.*/
extern int th_comment_query_count(th_comment *_tc,char *_tag);
/**Clears a #th_comment structure.
* This should be called on a #th_comment structure after it is no longer
* needed.
* It will free all memory used by the structure members.
* \param _tc The #th_comment struct to clear.*/
extern void th_comment_clear(th_comment *_tc);
/*@}*/
/*@}*/
#if defined(__cplusplus)
}
#endif
#endif

View File

@ -5,13 +5,13 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: theora.h,v 1.1 2004/02/24 13:50:14 shatty Exp $
last mod: $Id$
********************************************************************/
@ -19,52 +19,205 @@
#define _O_THEORA_H_
#ifdef __cplusplus
extern "C" {
#endif
extern "C"
{
#endif /* __cplusplus */
#include <stddef.h> /* for size_t */
#ifndef LIBOGG2
#include <ogg/ogg.h>
#else
#include <ogg2/ogg.h>
/* This is temporary until libogg2 is more complete */
ogg_buffer_state *ogg_buffer_create(void);
#endif
/** \file
* The libtheora pre-1.0 legacy C API.
*
* \ingroup oldfuncs
*
* \section intro Introduction
*
* This is the documentation for the libtheora legacy C API, declared in
* the theora.h header, which describes the old interface used before
* the 1.0 release. This API was widely deployed for several years and
* remains supported, but for new code we recommend the cleaner API
* declared in theoradec.h and theoraenc.h.
*
* libtheora is the reference implementation for
* <a href="http://www.theora.org/">Theora</a>, a free video codec.
* Theora is derived from On2's VP3 codec with improved integration with
* Ogg multimedia formats by <a href="http://www.xiph.org/">Xiph.Org</a>.
*
* \section overview Overview
*
* This library will both decode and encode theora packets to/from raw YUV
* frames. In either case, the packets will most likely either come from or
* need to be embedded in an Ogg stream. Use
* <a href="http://xiph.org/ogg/">libogg</a> or
* <a href="http://www.annodex.net/software/liboggz/index.html">liboggz</a>
* to extract/package these packets.
*
* \section decoding Decoding Process
*
* Decoding can be separated into the following steps:
* -# initialise theora_info and theora_comment structures using
* theora_info_init() and theora_comment_init():
\verbatim
theora_info info;
theora_comment comment;
theora_info_init(&info);
theora_comment_init(&comment);
\endverbatim
* -# retrieve header packets from Ogg stream (there should be 3) and decode
* into theora_info and theora_comment structures using
* theora_decode_header(). See \ref identification for more information on
* identifying which packets are theora packets.
\verbatim
int i;
for (i = 0; i < 3; i++)
{
(get a theora packet "op" from the Ogg stream)
theora_decode_header(&info, &comment, op);
}
\endverbatim
* -# initialise the decoder based on the information retrieved into the
* theora_info struct by theora_decode_header(). You will need a
* theora_state struct.
\verbatim
theora_state state;
theora_decode_init(&state, &info);
\endverbatim
* -# pass in packets and retrieve decoded frames! See the yuv_buffer
* documentation for information on how to retrieve raw YUV data.
\verbatim
yuf_buffer buffer;
while (last packet was not e_o_s) {
(get a theora packet "op" from the Ogg stream)
theora_decode_packetin(&state, op);
theora_decode_YUVout(&state, &buffer);
}
\endverbatim
*
*
* \subsection identification Identifying Theora Packets
*
* All streams inside an Ogg file have a unique serial_no attached to the
* stream. Typically, you will want to
* - retrieve the serial_no for each b_o_s (beginning of stream) page
* encountered within the Ogg file;
* - test the first (only) packet on that page to determine if it is a theora
* packet;
* - once you have found a theora b_o_s page then use the retrieved serial_no
* to identify future packets belonging to the same theora stream.
*
* Note that you \e cannot use theora_packet_isheader() to determine if a
* packet is a theora packet or not, as this function does not perform any
* checking beyond whether a header bit is present. Instead, use the
* theora_decode_header() function and check the return value; or examine the
* header bytes at the beginning of the Ogg page.
*/
/** \defgroup oldfuncs Legacy pre-1.0 C API */
/* @{ */
/**
* A YUV buffer for passing uncompressed frames to and from the codec.
* This holds a Y'CbCr frame in planar format. The CbCr planes can be
* subsampled and have their own separate dimensions and row stride
* offsets. Note that the strides may be negative in some
* configurations. For theora the width and height of the largest plane
* must be a multiple of 16. The actual meaningful picture size and
* offset are stored in the theora_info structure; frames returned by
* the decoder may need to be cropped for display.
*
* All samples are 8 bits. Within each plane samples are ordered by
* row from the top of the frame to the bottom. Within each row samples
* are ordered from left to right.
*
* During decode, the yuv_buffer struct is allocated by the user, but all
* fields (including luma and chroma pointers) are filled by the library.
* These pointers address library-internal memory and their contents should
* not be modified.
*
* Conversely, during encode the user allocates the struct and fills out all
* fields. The user also manages the data addressed by the luma and chroma
* pointers. See the encoder_example.c and dump_video.c example files in
* theora/examples/ for more information.
*/
typedef struct {
int y_width;
int y_height;
int y_stride;
int y_width; /**< Width of the Y' luminance plane */
int y_height; /**< Height of the luminance plane */
int y_stride; /**< Offset in bytes between successive rows */
int uv_width;
int uv_height;
int uv_stride;
char *y;
char *u;
char *v;
int uv_width; /**< Width of the Cb and Cr chroma planes */
int uv_height; /**< Height of the chroma planes */
int uv_stride; /**< Offset between successive chroma rows */
unsigned char *y; /**< Pointer to start of luminance data */
unsigned char *u; /**< Pointer to start of Cb data */
unsigned char *v; /**< Pointer to start of Cr data */
} yuv_buffer;
/**
* A Colorspace.
*/
typedef enum {
OC_CS_UNSPECIFIED,
OC_CS_ITU_REC_470M,
OC_CS_ITU_REC_470BG,
OC_CS_UNSPECIFIED, /**< The colorspace is unknown or unspecified */
OC_CS_ITU_REC_470M, /**< This is the best option for 'NTSC' content */
OC_CS_ITU_REC_470BG, /**< This is the best option for 'PAL' content */
OC_CS_NSPACES /**< This marks the end of the defined colorspaces */
} theora_colorspace;
/**
* A Chroma subsampling
*
* These enumerate the available chroma subsampling options supported
* by the theora format. See Section 4.4 of the specification for
* exact definitions.
*/
typedef enum {
OC_PF_420, /**< Chroma subsampling by 2 in each direction (4:2:0) */
OC_PF_RSVD, /**< Reserved value */
OC_PF_422, /**< Horizonatal chroma subsampling by 2 (4:2:2) */
OC_PF_444, /**< No chroma subsampling at all (4:4:4) */
} theora_pixelformat;
/**
* Theora bitstream info.
* Contains the basic playback parameters for a stream,
* corresponding to the initial 'info' header packet.
*
* Encoded theora frames must be a multiple of 16 in width and height.
* To handle other frame sizes, a crop rectangle is specified in
* frame_height and frame_width, offset_x and * offset_y. The offset
* and size should still be a multiple of 2 to avoid chroma sampling
* shifts. Offset values in this structure are measured from the
* upper left of the image.
*
* Frame rate, in frames per second, is stored as a rational
* fraction. Aspect ratio is also stored as a rational fraction, and
* refers to the aspect ratio of the frame pixels, not of the
* overall frame itself.
*
* See <a href="http://svn.xiph.org/trunk/theora/examples/encoder_example.c">
* examples/encoder_example.c</a> for usage examples of the
* other paramters and good default settings for the encoder parameters.
*/
typedef struct {
ogg_uint32_t width;
ogg_uint32_t height;
ogg_uint32_t frame_width;
ogg_uint32_t frame_height;
ogg_uint32_t offset_x;
ogg_uint32_t offset_y;
ogg_uint32_t fps_numerator;
ogg_uint32_t fps_denominator;
ogg_uint32_t aspect_numerator;
ogg_uint32_t aspect_denominator;
theora_colorspace colorspace;
int target_bitrate;
int quality;
int quick_p; /* quick encode/decode */
ogg_uint32_t width; /**< encoded frame width */
ogg_uint32_t height; /**< encoded frame height */
ogg_uint32_t frame_width; /**< display frame width */
ogg_uint32_t frame_height; /**< display frame height */
ogg_uint32_t offset_x; /**< horizontal offset of the displayed frame */
ogg_uint32_t offset_y; /**< vertical offset of the displayed frame */
ogg_uint32_t fps_numerator; /**< frame rate numerator **/
ogg_uint32_t fps_denominator; /**< frame rate denominator **/
ogg_uint32_t aspect_numerator; /**< pixel aspect ratio numerator */
ogg_uint32_t aspect_denominator; /**< pixel aspect ratio denominator */
theora_colorspace colorspace; /**< colorspace */
int target_bitrate; /**< nominal bitrate in bits per second */
int quality; /**< Nominal quality setting, 0-63 */
int quick_p; /**< Quick encode/decode */
/* decode only */
unsigned char version_major;
@ -85,8 +238,12 @@ typedef struct {
ogg_int32_t noise_sensitivity;
ogg_int32_t sharpness;
theora_pixelformat pixelformat; /**< chroma subsampling mode to expect */
} theora_info;
/** Codec internal state and context.
*/
typedef struct{
theora_info *i;
ogg_int64_t granulepos;
@ -96,53 +253,532 @@ typedef struct{
} theora_state;
/**
* Comment header metadata.
*
* This structure holds the in-stream metadata corresponding to
* the 'comment' header packet.
*
* Meta data is stored as a series of (tag, value) pairs, in
* length-encoded string vectors. The first occurence of the
* '=' character delimits the tag and value. A particular tag
* may occur more than once. The character set encoding for
* the strings is always UTF-8, but the tag names are limited
* to case-insensitive ASCII. See the spec for details.
*
* In filling in this structure, theora_decode_header() will
* null-terminate the user_comment strings for safety. However,
* the bitstream format itself treats them as 8-bit clean,
* and so the length array should be treated as authoritative
* for their length.
*/
typedef struct theora_comment{
char **user_comments;
int *comment_lengths;
int comments;
char *vendor;
char **user_comments; /**< An array of comment string vectors */
int *comment_lengths; /**< An array of corresponding string vector lengths in bytes */
int comments; /**< The total number of comment string vectors */
char *vendor; /**< The vendor string identifying the encoder, null terminated */
} theora_comment;
#define OC_FAULT -1
#define OC_EINVAL -10
#define OC_BADHEADER -20
#define OC_NOTFORMAT -21
#define OC_VERSION -22
#define OC_IMPL -23
#define OC_BADPACKET -24
#define OC_NEWPACKET -25
/**\name theora_control() codes */
/* \anchor decctlcodes_old
* These are the available request codes for theora_control()
* when called with a decoder instance.
* By convention decoder control codes are odd, to distinguish
* them from \ref encctlcodes_old "encoder control codes" which
* are even.
*
* Note that since the 1.0 release, both the legacy and the final
* implementation accept all the same control codes, but only the
* final API declares the newer codes.
*
* Keep any experimental or vendor-specific values above \c 0x8000.*/
/*@{*/
/**Get the maximum post-processing level.
* The decoder supports a post-processing filter that can improve
* the appearance of the decoded images. This returns the highest
* level setting for this post-processor, corresponding to maximum
* improvement and computational expense.
*/
#define TH_DECCTL_GET_PPLEVEL_MAX (1)
/**Set the post-processing level.
* Sets the level of post-processing to use when decoding the
* compressed stream. This must be a value between zero (off)
* and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX.
*/
#define TH_DECCTL_SET_PPLEVEL (3)
/**Sets the maximum distance between key frames.
* This can be changed during an encode, but will be bounded by
* <tt>1<<th_info#keyframe_granule_shift</tt>.
* If it is set before encoding begins, th_info#keyframe_granule_shift will
* be enlarged appropriately.
*
* \param[in] buf <tt>ogg_uint32_t</tt>: The maximum distance between key
* frames.
* \param[out] buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
* \retval OC_FAULT \a theora_state or \a buf is <tt>NULL</tt>.
* \retval OC_EINVAL \a buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
* \retval OC_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
/**Set the granule position.
* Call this after a seek, to update the internal granulepos
* in the decoder, to insure that subsequent frames are marked
* properly. If you track timestamps yourself and do not use
* the granule postion returned by the decoder, then you do
* not need to use this control.
*/
#define TH_DECCTL_SET_GRANPOS (5)
/**\anchor encctlcodes_old */
/**Sets the quantization parameters to use.
* The parameters are copied, not stored by reference, so they can be freed
* after this call.
* <tt>NULL</tt> may be specified to revert to the default parameters.
*
* \param[in] buf #th_quant_info
* \retval OC_FAULT \a theora_state is <tt>NULL</tt>.
* \retval OC_EINVAL Encoding has already begun, the quantization parameters
* are not acceptable to this version of the encoder,
* \a buf is <tt>NULL</tt> and \a buf_sz is not zero,
* or \a buf is non-<tt>NULL</tt> and \a buf_sz is
* not <tt>sizeof(#th_quant_info)</tt>.
* \retval OC_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_QUANT_PARAMS (2)
/**Disables any encoder features that would prevent lossless transcoding back
* to VP3.
* This primarily means disabling block-level QI values and not using 4MV mode
* when any of the luma blocks in a macro block are not coded.
* It also includes using the VP3 quantization tables and Huffman codes; if you
* set them explicitly after calling this function, the resulting stream will
* not be VP3-compatible.
* If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source
* material, or when using a picture region smaller than the full frame (e.g.
* a non-multiple-of-16 width or height), then non-VP3 bitstream features will
* still be disabled, but the stream will still not be VP3-compatible, as VP3
* was not capable of encoding such formats.
* If you call this after encoding has already begun, then the quantization
* tables and codebooks cannot be changed, but the frame-level features will
* be enabled or disabled as requested.
*
* \param[in] buf <tt>int</tt>: a non-zero value to enable VP3 compatibility,
* or 0 to disable it (the default).
* \param[out] buf <tt>int</tt>: 1 if all bitstream features required for
* VP3-compatibility could be set, and 0 otherwise.
* The latter will be returned if the pixel format is not
* 4:2:0, the picture region is smaller than the full frame,
* or if encoding has begun, preventing the quantization
* tables and codebooks from being set.
* \retval OC_FAULT \a theora_state or \a buf is <tt>NULL</tt>.
* \retval OC_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>.
* \retval OC_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
/**Gets the maximum speed level.
* Higher speed levels favor quicker encoding over better quality per bit.
* Depending on the encoding mode, and the internal algorithms used, quality
* may actually improve, but in this case bitrate will also likely increase.
* In any case, overall rate/distortion performance will probably decrease.
* The maximum value, and the meaning of each value, may change depending on
* the current encoding mode (VBR vs. CQI, etc.).
*
* \param[out] buf int: The maximum encoding speed level.
* \retval OC_FAULT \a theora_state or \a buf is <tt>NULL</tt>.
* \retval OC_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>.
* \retval OC_IMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_GET_SPLEVEL_MAX (12)
/**Sets the speed level.
* By default a speed value of 1 is used.
*
* \param[in] buf int: The new encoding speed level.
* 0 is slowest, larger values use less CPU.
* \retval OC_FAULT \a theora_state or \a buf is <tt>NULL</tt>.
* \retval OC_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>, or the
* encoding speed level is out of bounds.
* The maximum encoding speed level may be
* implementation- and encoding mode-specific, and can be
* obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
* \retval OC_IMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_SET_SPLEVEL (14)
/*@}*/
#define OC_FAULT -1 /**< General failure */
#define OC_EINVAL -10 /**< Library encountered invalid internal data */
#define OC_DISABLED -11 /**< Requested action is disabled */
#define OC_BADHEADER -20 /**< Header packet was corrupt/invalid */
#define OC_NOTFORMAT -21 /**< Packet is not a theora packet */
#define OC_VERSION -22 /**< Bitstream version is not handled */
#define OC_IMPL -23 /**< Feature or action not implemented */
#define OC_BADPACKET -24 /**< Packet is corrupt */
#define OC_NEWPACKET -25 /**< Packet is an (ignorable) unhandled extension */
#define OC_DUPFRAME 1 /**< Packet is a dropped frame */
/**
* Retrieve a human-readable string to identify the encoder vendor and version.
* \returns A version string.
*/
extern const char *theora_version_string(void);
/**
* Retrieve a 32-bit version number.
* This number is composed of a 16-bit major version, 8-bit minor version
* and 8 bit sub-version, composed as follows:
<pre>
(VERSION_MAJOR<<16) + (VERSION_MINOR<<8) + (VERSION_SUB)
</pre>
* \returns The version number.
*/
extern ogg_uint32_t theora_version_number(void);
extern int theora_encode_init(theora_state *th, theora_info *c);
/**
* Initialize the theora encoder.
* \param th The theora_state handle to initialize for encoding.
* \param ti A theora_info struct filled with the desired encoding parameters.
* \retval 0 Success
*/
extern int theora_encode_init(theora_state *th, theora_info *ti);
/**
* Submit a YUV buffer to the theora encoder.
* \param t A theora_state handle previously initialized for encoding.
* \param yuv A buffer of YUV data to encode. Note that both the yuv_buffer
* struct and the luma/chroma buffers within should be allocated by
* the user.
* \retval OC_EINVAL Encoder is not ready, or is finished.
* \retval -1 The size of the given frame differs from those previously input
* \retval 0 Success
*/
extern int theora_encode_YUVin(theora_state *t, yuv_buffer *yuv);
/**
* Request the next packet of encoded video.
* The encoded data is placed in a user-provided ogg_packet structure.
* \param t A theora_state handle previously initialized for encoding.
* \param last_p whether this is the last packet the encoder should produce.
* \param op An ogg_packet structure to fill. libtheora will set all
* elements of this structure, including a pointer to encoded
* data. The memory for the encoded data is owned by libtheora.
* \retval 0 No internal storage exists OR no packet is ready
* \retval -1 The encoding process has completed
* \retval 1 Success
*/
extern int theora_encode_packetout( theora_state *t, int last_p,
ogg_packet *op);
/**
* Request a packet containing the initial header.
* A pointer to the header data is placed in a user-provided ogg_packet
* structure.
* \param t A theora_state handle previously initialized for encoding.
* \param op An ogg_packet structure to fill. libtheora will set all
* elements of this structure, including a pointer to the header
* data. The memory for the header data is owned by libtheora.
* \retval 0 Success
*/
extern int theora_encode_header(theora_state *t, ogg_packet *op);
/**
* Request a comment header packet from provided metadata.
* A pointer to the comment data is placed in a user-provided ogg_packet
* structure.
* \param tc A theora_comment structure filled with the desired metadata
* \param op An ogg_packet structure to fill. libtheora will set all
* elements of this structure, including a pointer to the encoded
* comment data. The memory for the comment data is owned by
* libtheora.
* \retval 0 Success
*/
extern int theora_encode_comment(theora_comment *tc, ogg_packet *op);
/**
* Request a packet containing the codebook tables for the stream.
* A pointer to the codebook data is placed in a user-provided ogg_packet
* structure.
* \param t A theora_state handle previously initialized for encoding.
* \param op An ogg_packet structure to fill. libtheora will set all
* elements of this structure, including a pointer to the codebook
* data. The memory for the header data is owned by libtheora.
* \retval 0 Success
*/
extern int theora_encode_tables(theora_state *t, ogg_packet *op);
/**
* Decode an Ogg packet, with the expectation that the packet contains
* an initial header, comment data or codebook tables.
*
* \param ci A theora_info structure to fill. This must have been previously
* initialized with theora_info_init(). If \a op contains an initial
* header, theora_decode_header() will fill \a ci with the
* parsed header values. If \a op contains codebook tables,
* theora_decode_header() will parse these and attach an internal
* representation to \a ci->codec_setup.
* \param cc A theora_comment structure to fill. If \a op contains comment
* data, theora_decode_header() will fill \a cc with the parsed
* comments.
* \param op An ogg_packet structure which you expect contains an initial
* header, comment data or codebook tables.
*
* \retval OC_BADHEADER \a op is NULL; OR the first byte of \a op->packet
* has the signature of an initial packet, but op is
* not a b_o_s packet; OR this packet has the signature
* of an initial header packet, but an initial header
* packet has already been seen; OR this packet has the
* signature of a comment packet, but the initial header
* has not yet been seen; OR this packet has the signature
* of a comment packet, but contains invalid data; OR
* this packet has the signature of codebook tables,
* but the initial header or comments have not yet
* been seen; OR this packet has the signature of codebook
* tables, but contains invalid data;
* OR the stream being decoded has a compatible version
* but this packet does not have the signature of a
* theora initial header, comments, or codebook packet
* \retval OC_VERSION The packet data of \a op is an initial header with
* a version which is incompatible with this version of
* libtheora.
* \retval OC_NEWPACKET the stream being decoded has an incompatible (future)
* version and contains an unknown signature.
* \retval 0 Success
*
* \note The normal usage is that theora_decode_header() be called on the
* first three packets of a theora logical bitstream in succession.
*/
extern int theora_decode_header(theora_info *ci, theora_comment *cc,
ogg_packet *op);
/**
* Initialize a theora_state handle for decoding.
* \param th The theora_state handle to initialize.
* \param c A theora_info struct filled with the desired decoding parameters.
* This is of course usually obtained from a previous call to
* theora_decode_header().
* \retval 0 Success
*/
extern int theora_decode_init(theora_state *th, theora_info *c);
/**
* Input a packet containing encoded data into the theora decoder.
* \param th A theora_state handle previously initialized for decoding.
* \param op An ogg_packet containing encoded theora data.
* \retval 0 Success
* \retval OC_BADPACKET \a op does not contain encoded video data
*/
extern int theora_decode_packetin(theora_state *th,ogg_packet *op);
/**
* Output the next available frame of decoded YUV data.
* \param th A theora_state handle previously initialized for decoding.
* \param yuv A yuv_buffer in which libtheora should place the decoded data.
* Note that the buffer struct itself is allocated by the user, but
* that the luma and chroma pointers will be filled in by the
* library. Also note that these luma and chroma regions should be
* considered read-only by the user.
* \retval 0 Success
*/
extern int theora_decode_YUVout(theora_state *th,yuv_buffer *yuv);
/**
* Report whether a theora packet is a header or not
* This function does no verification beyond checking the header
* flag bit so it should not be used for bitstream identification;
* use theora_decode_header() for that.
*
* \param op An ogg_packet containing encoded theora data.
* \retval 1 The packet is a header packet
* \retval 0 The packet is not a header packet (and so contains frame data)
*
* Thus function was added in the 1.0alpha4 release.
*/
extern int theora_packet_isheader(ogg_packet *op);
/**
* Report whether a theora packet is a keyframe or not
*
* \param op An ogg_packet containing encoded theora data.
* \retval 1 The packet contains a keyframe image
* \retval 0 The packet is contains an interframe delta
* \retval -1 The packet is not an image data packet at all
*
* Thus function was added in the 1.0alpha4 release.
*/
extern int theora_packet_iskeyframe(ogg_packet *op);
/**
* Report the granulepos shift radix
*
* When embedded in Ogg, Theora uses a two-part granulepos,
* splitting the 64-bit field into two pieces. The more-significant
* section represents the frame count at the last keyframe,
* and the less-significant section represents the count of
* frames since the last keyframe. In this way the overall
* field is still non-decreasing with time, but usefully encodes
* a pointer to the last keyframe, which is necessary for
* correctly restarting decode after a seek.
*
* This function reports the number of bits used to represent
* the distance to the last keyframe, and thus how the granulepos
* field must be shifted or masked to obtain the two parts.
*
* Since libtheora returns compressed data in an ogg_packet
* structure, this may be generally useful even if the Theora
* packets are not being used in an Ogg container.
*
* \param ti A previously initialized theora_info struct
* \returns The bit shift dividing the two granulepos fields
*
* This function was added in the 1.0alpha5 release.
*/
int theora_granule_shift(theora_info *ti);
/**
* Convert a granulepos to an absolute frame index, starting at 0.
* The granulepos is interpreted in the context of a given theora_state handle.
*
* Note that while the granulepos encodes the frame count (i.e. starting
* from 1) this call returns the frame index, starting from zero. Thus
* One can calculate the presentation time by multiplying the index by
* the rate.
*
* \param th A previously initialized theora_state handle (encode or decode)
* \param granulepos The granulepos to convert.
* \returns The frame index corresponding to \a granulepos.
* \retval -1 The given granulepos is undefined (i.e. negative)
*
* Thus function was added in the 1.0alpha4 release.
*/
extern ogg_int64_t theora_granule_frame(theora_state *th,ogg_int64_t granulepos);
/**
* Convert a granulepos to absolute time in seconds. The granulepos is
* interpreted in the context of a given theora_state handle, and gives
* the end time of a frame's presentation as used in Ogg mux ordering.
*
* \param th A previously initialized theora_state handle (encode or decode)
* \param granulepos The granulepos to convert.
* \returns The absolute time in seconds corresponding to \a granulepos.
* This is the "end time" for the frame, or the latest time it should
* be displayed.
* It is not the presentation time.
* \retval -1. The given granulepos is undefined (i.e. negative), or
* \retval -1. The function has been disabled because floating
* point support is not available.
*/
extern double theora_granule_time(theora_state *th,ogg_int64_t granulepos);
/**
* Initialize a theora_info structure. All values within the given theora_info
* structure are initialized, and space is allocated within libtheora for
* internal codec setup data.
* \param c A theora_info struct to initialize.
*/
extern void theora_info_init(theora_info *c);
/**
* Clear a theora_info structure. All values within the given theora_info
* structure are cleared, and associated internal codec setup data is freed.
* \param c A theora_info struct to initialize.
*/
extern void theora_info_clear(theora_info *c);
/**
* Free all internal data associated with a theora_state handle.
* \param t A theora_state handle.
*/
extern void theora_clear(theora_state *t);
/**
* Initialize an allocated theora_comment structure
* \param tc An allocated theora_comment structure
**/
extern void theora_comment_init(theora_comment *tc);
/**
* Add a comment to an initialized theora_comment structure
* \param tc A previously initialized theora comment structure
* \param comment A null-terminated string encoding the comment in the form
* "TAG=the value"
*
* Neither theora_comment_add() nor theora_comment_add_tag() support
* comments containing null values, although the bitstream format
* supports this. To add such comments you will need to manipulate
* the theora_comment structure directly.
**/
extern void theora_comment_add(theora_comment *tc, char *comment);
/**
* Add a comment to an initialized theora_comment structure.
* \param tc A previously initialized theora comment structure
* \param tag A null-terminated string containing the tag
* associated with the comment.
* \param value The corresponding value as a null-terminated string
*
* Neither theora_comment_add() nor theora_comment_add_tag() support
* comments containing null values, although the bitstream format
* supports this. To add such comments you will need to manipulate
* the theora_comment structure directly.
**/
extern void theora_comment_add_tag(theora_comment *tc,
char *tag, char *value);
/**
* Look up a comment value by tag.
* \param tc Tn initialized theora_comment structure
* \param tag The tag to look up
* \param count The instance of the tag. The same tag can appear multiple
* times, each with a distinct and ordered value, so an index
* is required to retrieve them all.
* \returns A pointer to the queried tag's value
* \retval NULL No matching tag is found
*
* \note Use theora_comment_query_count() to get the legal range for the
* count parameter.
**/
extern char *theora_comment_query(theora_comment *tc, char *tag, int count);
/** Look up the number of instances of a tag.
* \param tc An initialized theora_comment structure
* \param tag The tag to look up
* \returns The number on instances of a particular tag.
*
* Call this first when querying for a specific tag and then interate
* over the number of instances with separate calls to
* theora_comment_query() to retrieve all instances in order.
**/
extern int theora_comment_query_count(theora_comment *tc, char *tag);
/**
* Clear an allocated theora_comment struct so that it can be freed.
* \param tc An allocated theora_comment structure.
**/
extern void theora_comment_clear(theora_comment *tc);
/**Encoder control function.
* This is used to provide advanced control the encoding process.
* \param th A #theora_state handle.
* \param req The control code to process.
* See \ref encctlcodes_old "the list of available
* control codes" for details.
* \param buf The parameters for this control code.
* \param buf_sz The size of the parameter buffer.*/
extern int theora_control(theora_state *th,int req,void *buf,size_t buf_sz);
/* @} */ /* end oldfuncs doxygen group */
#ifdef __cplusplus
}
#endif
#endif /* __cplusplus */
#endif /* _O_THEORA_H_ */

View File

@ -0,0 +1,325 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $
********************************************************************/
/**\file
* The <tt>libtheoradec</tt> C decoding API.*/
#if !defined(_O_THEORA_THEORADEC_H_)
# define _O_THEORA_THEORADEC_H_ (1)
# include <stddef.h>
# include <ogg/ogg.h>
# include "codec.h"
#if defined(__cplusplus)
extern "C" {
#endif
/**\name th_decode_ctl() codes
* \anchor decctlcodes
* These are the available request codes for th_decode_ctl().
* By convention, these are odd, to distinguish them from the
* \ref encctlcodes "encoder control codes".
* Keep any experimental or vendor-specific values above \c 0x8000.*/
/*@{*/
/**Gets the maximum post-processing level.
* The decoder supports a post-processing filter that can improve
* the appearance of the decoded images. This returns the highest
* level setting for this post-processor, corresponding to maximum
* improvement and computational expense.
*
* \param[out] _buf int: The maximum post-processing level.
* \retval TH_EFAULT \a _dec_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
* \retval TH_EIMPL Not supported by this implementation.*/
#define TH_DECCTL_GET_PPLEVEL_MAX (1)
/**Sets the post-processing level.
* By default, post-processing is disabled.
*
* Sets the level of post-processing to use when decoding the
* compressed stream. This must be a value between zero (off)
* and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX.
*
* \param[in] _buf int: The new post-processing level.
* 0 to disable; larger values use more CPU.
* \retval TH_EFAULT \a _dec_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or the
* post-processing level is out of bounds.
* The maximum post-processing level may be
* implementation-specific, and can be obtained via
* #TH_DECCTL_GET_PPLEVEL_MAX.
* \retval TH_EIMPL Not supported by this implementation.*/
#define TH_DECCTL_SET_PPLEVEL (3)
/**Sets the granule position.
* Call this after a seek, before decoding the first frame, to ensure that the
* proper granule position is returned for all subsequent frames.
* If you track timestamps yourself and do not use the granule position
* returned by the decoder, then you need not call this function.
*
* \param[in] _buf <tt>ogg_int64_t</tt>: The granule position of the next
* frame.
* \retval TH_EFAULT \a _dec_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(ogg_int64_t)</tt>, or the
* granule position is negative.*/
#define TH_DECCTL_SET_GRANPOS (5)
/**Sets the striped decode callback function.
* If set, this function will be called as each piece of a frame is fully
* decoded in th_decode_packetin().
* You can pass in a #th_stripe_callback with
* th_stripe_callback#stripe_decoded set to <tt>NULL</tt> to disable the
* callbacks at any point.
* Enabling striped decode does not prevent you from calling
* th_decode_ycbcr_out() after the frame is fully decoded.
*
* \param[in] _buf #th_stripe_callback: The callback parameters.
* \retval TH_EFAULT \a _dec_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not
* <tt>sizeof(th_stripe_callback)</tt>.*/
#define TH_DECCTL_SET_STRIPE_CB (7)
/**Enables telemetry and sets the macroblock display mode */
#define TH_DECCTL_SET_TELEMETRY_MBMODE (9)
/**Enables telemetry and sets the motion vector display mode */
#define TH_DECCTL_SET_TELEMETRY_MV (11)
/**Enables telemetry and sets the adaptive quantization display mode */
#define TH_DECCTL_SET_TELEMETRY_QI (13)
/**Enables telemetry and sets the bitstream breakdown visualization mode */
#define TH_DECCTL_SET_TELEMETRY_BITS (15)
/*@}*/
/**A callback function for striped decode.
* This is a function pointer to an application-provided function that will be
* called each time a section of the image is fully decoded in
* th_decode_packetin().
* This allows the application to process the section immediately, while it is
* still in cache.
* Note that the frame is decoded bottom to top, so \a _yfrag0 will steadily
* decrease with each call until it reaches 0, at which point the full frame
* is decoded.
* The number of fragment rows made available in each call depends on the pixel
* format and the number of post-processing filters enabled, and may not even
* be constant for the entire frame.
* If a non-<tt>NULL</tt> \a _granpos pointer is passed to
* th_decode_packetin(), the granule position for the frame will be stored
* in it before the first callback is made.
* If an entire frame is dropped (a 0-byte packet), then no callbacks will be
* made at all for that frame.
* \param _ctx An application-provided context pointer.
* \param _buf The image buffer for the decoded frame.
* \param _yfrag0 The Y coordinate of the first row of 8x8 fragments
* decoded.
* Multiply this by 8 to obtain the pixel row number in the
* luma plane.
* If the chroma planes are subsampled in the Y direction,
* this will always be divisible by two.
* \param _yfrag_end The Y coordinate of the first row of 8x8 fragments past
* the newly decoded section.
* If the chroma planes are subsampled in the Y direction,
* this will always be divisible by two.
* I.e., this section contains fragment rows
* <tt>\a _yfrag0 ...\a _yfrag_end -1</tt>.*/
typedef void (*th_stripe_decoded_func)(void *_ctx,th_ycbcr_buffer _buf,
int _yfrag0,int _yfrag_end);
/**The striped decode callback data to pass to #TH_DECCTL_SET_STRIPE_CB.*/
typedef struct{
/**An application-provided context pointer.
* This will be passed back verbatim to the application.*/
void *ctx;
/**The callback function pointer.*/
th_stripe_decoded_func stripe_decoded;
}th_stripe_callback;
/**\name Decoder state
The following data structures are opaque, and their contents are not
publicly defined by this API.
Referring to their internals directly is unsupported, and may break without
warning.*/
/*@{*/
/**The decoder context.*/
typedef struct th_dec_ctx th_dec_ctx;
/**Setup information.
This contains auxiliary information (Huffman tables and quantization
parameters) decoded from the setup header by th_decode_headerin() to be
passed to th_decode_alloc().
It can be re-used to initialize any number of decoders, and can be freed
via th_setup_free() at any time.*/
typedef struct th_setup_info th_setup_info;
/*@}*/
/**\defgroup decfuncs Functions for Decoding*/
/*@{*/
/**\name Functions for decoding
* You must link to <tt>libtheoradec</tt> if you use any of the
* functions in this section.
*
* The functions are listed in the order they are used in a typical decode.
* The basic steps are:
* - Parse the header packets by repeatedly calling th_decode_headerin().
* - Allocate a #th_dec_ctx handle with th_decode_alloc().
* - Call th_setup_free() to free any memory used for codec setup
* information.
* - Perform any additional decoder configuration with th_decode_ctl().
* - For each video data packet:
* - Submit the packet to the decoder via th_decode_packetin().
* - Retrieve the uncompressed video data via th_decode_ycbcr_out().
* - Call th_decode_free() to release all decoder memory.*/
/*@{*/
/**Decodes the header packets of a Theora stream.
* This should be called on the initial packets of the stream, in succession,
* until it returns <tt>0</tt>, indicating that all headers have been
* processed, or an error is encountered.
* At least three header packets are required, and additional optional header
* packets may follow.
* This can be used on the first packet of any logical stream to determine if
* that stream is a Theora stream.
* \param _info A #th_info structure to fill in.
* This must have been previously initialized with
* th_info_init().
* The application may immediately begin using the contents of
* this structure after the first header is decoded, though it
* must continue to be passed in on all subsequent calls.
* \param _tc A #th_comment structure to fill in.
* The application may immediately begin using the contents of
* this structure after the second header is decoded, though it
* must continue to be passed in on all subsequent calls.
* \param _setup Returns a pointer to additional, private setup information
* needed by the decoder.
* The contents of this pointer must be initialized to
* <tt>NULL</tt> on the first call, and the returned value must
* continue to be passed in on all subsequent calls.
* \param _op An <tt>ogg_packet</tt> structure which contains one of the
* initial packets of an Ogg logical stream.
* \return A positive value indicates that a Theora header was successfully
* processed.
* \retval 0 The first video data packet was encountered after all
* required header packets were parsed.
* The packet just passed in on this call should be saved
* and fed to th_decode_packetin() to begin decoding
* video data.
* \retval TH_EFAULT One of \a _info, \a _tc, or \a _setup was
* <tt>NULL</tt>.
* \retval TH_EBADHEADER \a _op was <tt>NULL</tt>, the packet was not the next
* header packet in the expected sequence, or the format
* of the header data was invalid.
* \retval TH_EVERSION The packet data was a Theora info header, but for a
* bitstream version not decodable with this version of
* <tt>libtheoradec</tt>.
* \retval TH_ENOTFORMAT The packet was not a Theora header.
*/
extern int th_decode_headerin(th_info *_info,th_comment *_tc,
th_setup_info **_setup,ogg_packet *_op);
/**Allocates a decoder instance.
*
* <b>Security Warning:</b> The Theora format supports very large frame sizes,
* potentially even larger than the address space of a 32-bit machine, and
* creating a decoder context allocates the space for several frames of data.
* If the allocation fails here, your program will crash, possibly at some
* future point because the OS kernel returned a valid memory range and will
* only fail when it tries to map the pages in it the first time they are
* used.
* Even if it succeeds, you may experience a denial of service if the frame
* size is large enough to cause excessive paging.
* If you are integrating libtheora in a larger application where such things
* are undesirable, it is highly recommended that you check the frame size in
* \a _info before calling this function and refuse to decode streams where it
* is larger than some reasonable maximum.
* libtheora will not check this for you, because there may be machines that
* can handle such streams and applications that wish to.
* \param _info A #th_info struct filled via th_decode_headerin().
* \param _setup A #th_setup_info handle returned via
* th_decode_headerin().
* \return The initialized #th_dec_ctx handle.
* \retval NULL If the decoding parameters were invalid.*/
extern th_dec_ctx *th_decode_alloc(const th_info *_info,
const th_setup_info *_setup);
/**Releases all storage used for the decoder setup information.
* This should be called after you no longer want to create any decoders for
* a stream whose headers you have parsed with th_decode_headerin().
* \param _setup The setup information to free.
* This can safely be <tt>NULL</tt>.*/
extern void th_setup_free(th_setup_info *_setup);
/**Decoder control function.
* This is used to provide advanced control of the decoding process.
* \param _dec A #th_dec_ctx handle.
* \param _req The control code to process.
* See \ref decctlcodes "the list of available control codes"
* for details.
* \param _buf The parameters for this control code.
* \param _buf_sz The size of the parameter buffer.*/
extern int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
size_t _buf_sz);
/**Submits a packet containing encoded video data to the decoder.
* \param _dec A #th_dec_ctx handle.
* \param _op An <tt>ogg_packet</tt> containing encoded video data.
* \param _granpos Returns the granule position of the decoded packet.
* If non-<tt>NULL</tt>, the granule position for this specific
* packet is stored in this location.
* This is computed incrementally from previously decoded
* packets.
* After a seek, the correct granule position must be set via
* #TH_DECCTL_SET_GRANPOS for this to work properly.
* \retval 0 Success.
* A new decoded frame can be retrieved by calling
* th_decode_ycbcr_out().
* \retval TH_DUPFRAME The packet represented a dropped (0-byte) frame.
* The player can skip the call to th_decode_ycbcr_out(),
* as the contents of the decoded frame buffer have not
* changed.
* \retval TH_EFAULT \a _dec or \a _op was <tt>NULL</tt>.
* \retval TH_EBADPACKET \a _op does not contain encoded video data.
* \retval TH_EIMPL The video data uses bitstream features which this
* library does not support.*/
extern int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
ogg_int64_t *_granpos);
/**Outputs the next available frame of decoded Y'CbCr data.
* If a striped decode callback has been set with #TH_DECCTL_SET_STRIPE_CB,
* then the application does not need to call this function.
* \param _dec A #th_dec_ctx handle.
* \param _ycbcr A video buffer structure to fill in.
* <tt>libtheoradec</tt> will fill in all the members of this
* structure, including the pointers to the uncompressed video
* data.
* The memory for this video data is owned by
* <tt>libtheoradec</tt>.
* It may be freed or overwritten without notification when
* subsequent frames are decoded.
* \retval 0 Success
* \retval TH_EFAULT \a _dec or \a _ycbcr was <tt>NULL</tt>.
*/
extern int th_decode_ycbcr_out(th_dec_ctx *_dec,
th_ycbcr_buffer _ycbcr);
/**Frees an allocated decoder instance.
* \param _dec A #th_dec_ctx handle.*/
extern void th_decode_free(th_dec_ctx *_dec);
/*@}*/
/*@}*/
#if defined(__cplusplus)
}
#endif
#endif

View File

@ -0,0 +1,486 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $
********************************************************************/
/**\file
* The <tt>libtheoraenc</tt> C encoding API.*/
#if !defined(_O_THEORA_THEORAENC_H_)
# define _O_THEORA_THEORAENC_H_ (1)
# include <stddef.h>
# include <ogg/ogg.h>
# include "codec.h"
#if defined(__cplusplus)
extern "C" {
#endif
/**\name th_encode_ctl() codes
* \anchor encctlcodes
* These are the available request codes for th_encode_ctl().
* By convention, these are even, to distinguish them from the
* \ref decctlcodes "decoder control codes".
* Keep any experimental or vendor-specific values above \c 0x8000.*/
/*@{*/
/**Sets the Huffman tables to use.
* The tables are copied, not stored by reference, so they can be freed after
* this call.
* <tt>NULL</tt> may be specified to revert to the default tables.
*
* \param[in] _buf <tt>#th_huff_code[#TH_NHUFFMAN_TABLES][#TH_NDCT_TOKENS]</tt>
* \retval TH_EFAULT \a _enc_ctx is <tt>NULL</tt>.
* \retval TH_EINVAL Encoding has already begun or one or more of the given
* tables is not full or prefix-free, \a _buf is
* <tt>NULL</tt> and \a _buf_sz is not zero, or \a _buf is
* non-<tt>NULL</tt> and \a _buf_sz is not
* <tt>sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS</tt>.
* \retval TH_EIMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_HUFFMAN_CODES (0)
/**Sets the quantization parameters to use.
* The parameters are copied, not stored by reference, so they can be freed
* after this call.
* <tt>NULL</tt> may be specified to revert to the default parameters.
*
* \param[in] _buf #th_quant_info
* \retval TH_EFAULT \a _enc_ctx is <tt>NULL</tt>.
* \retval TH_EINVAL Encoding has already begun, \a _buf is
* <tt>NULL</tt> and \a _buf_sz is not zero,
* or \a _buf is non-<tt>NULL</tt> and
* \a _buf_sz is not <tt>sizeof(#th_quant_info)</tt>.
* \retval TH_EIMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_QUANT_PARAMS (2)
/**Sets the maximum distance between key frames.
* This can be changed during an encode, but will be bounded by
* <tt>1<<th_info#keyframe_granule_shift</tt>.
* If it is set before encoding begins, th_info#keyframe_granule_shift will
* be enlarged appropriately.
*
* \param[in] _buf <tt>ogg_uint32_t</tt>: The maximum distance between key
* frames.
* \param[out] _buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
* \retval TH_EIMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
/**Disables any encoder features that would prevent lossless transcoding back
* to VP3.
* This primarily means disabling block-adaptive quantization and always coding
* all four luma blocks in a macro block when 4MV is used.
* It also includes using the VP3 quantization tables and Huffman codes; if you
* set them explicitly after calling this function, the resulting stream will
* not be VP3-compatible.
* If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source
* material, or when using a picture region smaller than the full frame (e.g.
* a non-multiple-of-16 width or height), then non-VP3 bitstream features will
* still be disabled, but the stream will still not be VP3-compatible, as VP3
* was not capable of encoding such formats.
* If you call this after encoding has already begun, then the quantization
* tables and codebooks cannot be changed, but the frame-level features will
* be enabled or disabled as requested.
*
* \param[in] _buf <tt>int</tt>: a non-zero value to enable VP3 compatibility,
* or 0 to disable it (the default).
* \param[out] _buf <tt>int</tt>: 1 if all bitstream features required for
* VP3-compatibility could be set, and 0 otherwise.
* The latter will be returned if the pixel format is not
* 4:2:0, the picture region is smaller than the full frame,
* or if encoding has begun, preventing the quantization
* tables and codebooks from being set.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
* \retval TH_EIMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
/**Gets the maximum speed level.
* Higher speed levels favor quicker encoding over better quality per bit.
* Depending on the encoding mode, and the internal algorithms used, quality
* may actually improve, but in this case bitrate will also likely increase.
* In any case, overall rate/distortion performance will probably decrease.
* The maximum value, and the meaning of each value, may change depending on
* the current encoding mode (VBR vs. constant quality, etc.).
*
* \param[out] _buf <tt>int</tt>: The maximum encoding speed level.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
* \retval TH_EIMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_GET_SPLEVEL_MAX (12)
/**Sets the speed level.
* The current speed level may be retrieved using #TH_ENCCTL_GET_SPLEVEL.
*
* \param[in] _buf <tt>int</tt>: The new encoding speed level.
* 0 is slowest, larger values use less CPU.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or the
* encoding speed level is out of bounds.
* The maximum encoding speed level may be
* implementation- and encoding mode-specific, and can be
* obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
* \retval TH_EIMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_SET_SPLEVEL (14)
/**Gets the current speed level.
* The default speed level may vary according to encoder implementation, but if
* this control code is not supported (it returns #TH_EIMPL), the default may
* be assumed to be the slowest available speed (0).
* The maximum encoding speed level may be implementation- and encoding
* mode-specific, and can be obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
*
* \param[out] _buf <tt>int</tt>: The current encoding speed level.
* 0 is slowest, larger values use less CPU.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
* \retval TH_EIMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_GET_SPLEVEL (16)
/**Sets the number of duplicates of the next frame to produce.
* Although libtheora can encode duplicate frames very cheaply, it costs some
* amount of CPU to detect them, and a run of duplicates cannot span a
* keyframe boundary.
* This control code tells the encoder to produce the specified number of extra
* duplicates of the next frame.
* This allows the encoder to make smarter keyframe placement decisions and
* rate control decisions, and reduces CPU usage as well, when compared to
* just submitting the same frame for encoding multiple times.
* This setting only applies to the next frame submitted for encoding.
* You MUST call th_encode_packetout() repeatedly until it returns 0, or the
* extra duplicate frames will be lost.
*
* \param[in] _buf <tt>int</tt>: The number of duplicates to produce.
* If this is negative or zero, no duplicates will be produced.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or the
* number of duplicates is greater than or equal to the
* maximum keyframe interval.
* In the latter case, NO duplicate frames will be produced.
* You must ensure that the maximum keyframe interval is set
* larger than the maximum number of duplicates you will
* ever wish to insert prior to encoding.
* \retval TH_EIMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_SET_DUP_COUNT (18)
/**Modifies the default bitrate management behavior.
* Use to allow or disallow frame dropping, and to enable or disable capping
* bit reservoir overflows and underflows.
* See \ref encctlcodes "the list of available flags".
* The flags are set by default to
* <tt>#TH_RATECTL_DROP_FRAMES|#TH_RATECTL_CAP_OVERFLOW</tt>.
*
* \param[in] _buf <tt>int</tt>: Any combination of
* \ref ratectlflags "the available flags":
* - #TH_RATECTL_DROP_FRAMES: Enable frame dropping.
* - #TH_RATECTL_CAP_OVERFLOW: Don't bank excess bits for later
* use.
* - #TH_RATECTL_CAP_UNDERFLOW: Don't try to make up shortfalls
* later.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt> or rate control
* is not enabled.
* \retval TH_EIMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_SET_RATE_FLAGS (20)
/**Sets the size of the bitrate management bit reservoir as a function
* of number of frames.
* The reservoir size affects how quickly bitrate management reacts to
* instantaneous changes in the video complexity.
* Larger reservoirs react more slowly, and provide better overall quality, but
* require more buffering by a client, adding more latency to live streams.
* By default, libtheora sets the reservoir to the maximum distance between
* keyframes, subject to a minimum and maximum limit.
* This call may be used to increase or decrease the reservoir, increasing or
* decreasing the allowed temporary variance in bitrate.
* An implementation may impose some limits on the size of a reservoir it can
* handle, in which case the actual reservoir size may not be exactly what was
* requested.
* The actual value set will be returned.
*
* \param[in] _buf <tt>int</tt>: Requested size of the reservoir measured in
* frames.
* \param[out] _buf <tt>int</tt>: The actual size of the reservoir set.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or rate control
* is not enabled. The buffer has an implementation
* defined minimum and maximum size and the value in _buf
* will be adjusted to match the actual value set.
* \retval TH_EIMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_SET_RATE_BUFFER (22)
/**Enable pass 1 of two-pass encoding mode and retrieve the first pass metrics.
* Pass 1 mode must be enabled before the first frame is encoded, and a target
* bitrate must have already been specified to the encoder.
* Although this does not have to be the exact rate that will be used in the
* second pass, closer values may produce better results.
* The first call returns the size of the two-pass header data, along with some
* placeholder content, and sets the encoder into pass 1 mode implicitly.
* This call sets the encoder to pass 1 mode implicitly.
* Then, a subsequent call must be made after each call to
* th_encode_ycbcr_in() to retrieve the metrics for that frame.
* An additional, final call must be made to retrieve the summary data,
* containing such information as the total number of frames, etc.
* This must be stored in place of the placeholder data that was returned
* in the first call, before the frame metrics data.
* All of this data must be presented back to the encoder during pass 2 using
* #TH_ENCCTL_2PASS_IN.
*
* \param[out] <tt>char *</tt>_buf: Returns a pointer to internal storage
* containing the two pass metrics data.
* This storage is only valid until the next call, or until the
* encoder context is freed, and must be copied by the
* application.
* \retval >=0 The number of bytes of metric data available in the
* returned buffer.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(char *)</tt>, no target
* bitrate has been set, or the first call was made after
* the first frame was submitted for encoding.
* \retval TH_EIMPL Not supported by this implementation.*/
#define TH_ENCCTL_2PASS_OUT (24)
/**Submits two-pass encoding metric data collected the first encoding pass to
* the second pass.
* The first call must be made before the first frame is encoded, and a target
* bitrate must have already been specified to the encoder.
* It sets the encoder to pass 2 mode implicitly; this cannot be disabled.
* The encoder may require reading data from some or all of the frames in
* advance, depending on, e.g., the reservoir size used in the second pass.
* You must call this function repeatedly before each frame to provide data
* until either a) it fails to consume all of the data presented or b) all of
* the pass 1 data has been consumed.
* In the first case, you must save the remaining data to be presented after
* the next frame.
* You can call this function with a NULL argument to get an upper bound on
* the number of bytes that will be required before the next frame.
*
* When pass 2 is first enabled, the default bit reservoir is set to the entire
* file; this gives maximum flexibility but can lead to very high peak rates.
* You can subsequently set it to another value with #TH_ENCCTL_SET_RATE_BUFFER
* (e.g., to set it to the keyframe interval for non-live streaming), however,
* you may then need to provide more data before the next frame.
*
* \param[in] _buf <tt>char[]</tt>: A buffer containing the data returned by
* #TH_ENCCTL_2PASS_OUT in pass 1.
* You may pass <tt>NULL</tt> for \a _buf to return an upper
* bound on the number of additional bytes needed before the
* next frame.
* The summary data returned at the end of pass 1 must be at
* the head of the buffer on the first call with a
* non-<tt>NULL</tt> \a _buf, and the placeholder data
* returned at the start of pass 1 should be omitted.
* After each call you should advance this buffer by the number
* of bytes consumed.
* \retval >0 The number of bytes of metric data required/consumed.
* \retval 0 No more data is required before the next frame.
* \retval TH_EFAULT \a _enc_ctx is <tt>NULL</tt>.
* \retval TH_EINVAL No target bitrate has been set, or the first call was
* made after the first frame was submitted for
* encoding.
* \retval TH_ENOTFORMAT The data did not appear to be pass 1 from a compatible
* implementation of this library.
* \retval TH_EBADHEADER The data was invalid; this may be returned when
* attempting to read an aborted pass 1 file that still
* has the placeholder data in place of the summary
* data.
* \retval TH_EIMPL Not supported by this implementation.*/
#define TH_ENCCTL_2PASS_IN (26)
/**Sets the current encoding quality.
* This is only valid so long as no bitrate has been specified, either through
* the #th_info struct used to initialize the encoder or through
* #TH_ENCCTL_SET_BITRATE (this restriction may be relaxed in a future
* version).
* If it is set before the headers are emitted, the target quality encoded in
* them will be updated.
*
* \param[in] _buf <tt>int</tt>: The new target quality, in the range 0...63,
* inclusive.
* \retval 0 Success.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL A target bitrate has already been specified, or the
* quality index was not in the range 0...63.
* \retval TH_EIMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_QUALITY (28)
/**Sets the current encoding bitrate.
* Once a bitrate is set, the encoder must use a rate-controlled mode for all
* future frames (this restriction may be relaxed in a future version).
* If it is set before the headers are emitted, the target bitrate encoded in
* them will be updated.
* Due to the buffer delay, the exact bitrate of each section of the encode is
* not guaranteed.
* The encoder may have already used more bits than allowed for the frames it
* has encoded, expecting to make them up in future frames, or it may have
* used fewer, holding the excess in reserve.
* The exact transition between the two bitrates is not well-defined by this
* API, but may be affected by flags set with #TH_ENCCTL_SET_RATE_FLAGS.
* After a number of frames equal to the buffer delay, one may expect further
* output to average at the target bitrate.
*
* \param[in] _buf <tt>long</tt>: The new target bitrate, in bits per second.
* \retval 0 Success.
* \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
* \retval TH_EINVAL The target bitrate was not positive.
* \retval TH_EIMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_BITRATE (30)
/*@}*/
/**\name TH_ENCCTL_SET_RATE_FLAGS flags
* \anchor ratectlflags
* These are the flags available for use with #TH_ENCCTL_SET_RATE_FLAGS.*/
/*@{*/
/**Drop frames to keep within bitrate buffer constraints.
* This can have a severe impact on quality, but is the only way to ensure that
* bitrate targets are met at low rates during sudden bursts of activity.*/
#define TH_RATECTL_DROP_FRAMES (0x1)
/**Ignore bitrate buffer overflows.
* If the encoder uses so few bits that the reservoir of available bits
* overflows, ignore the excess.
* The encoder will not try to use these extra bits in future frames.
* At high rates this may cause the result to be undersized, but allows a
* client to play the stream using a finite buffer; it should normally be
* enabled.*/
#define TH_RATECTL_CAP_OVERFLOW (0x2)
/**Ignore bitrate buffer underflows.
* If the encoder uses so many bits that the reservoir of available bits
* underflows, ignore the deficit.
* The encoder will not try to make up these extra bits in future frames.
* At low rates this may cause the result to be oversized; it should normally
* be disabled.*/
#define TH_RATECTL_CAP_UNDERFLOW (0x4)
/*@}*/
/**The quantization parameters used by VP3.*/
extern const th_quant_info TH_VP31_QUANT_INFO;
/**The Huffman tables used by VP3.*/
extern const th_huff_code
TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
/**\name Encoder state
The following data structure is opaque, and its contents are not publicly
defined by this API.
Referring to its internals directly is unsupported, and may break without
warning.*/
/*@{*/
/**The encoder context.*/
typedef struct th_enc_ctx th_enc_ctx;
/*@}*/
/**\defgroup encfuncs Functions for Encoding*/
/*@{*/
/**\name Functions for encoding
* You must link to <tt>libtheoraenc</tt> and <tt>libtheoradec</tt>
* if you use any of the functions in this section.
*
* The functions are listed in the order they are used in a typical encode.
* The basic steps are:
* - Fill in a #th_info structure with details on the format of the video you
* wish to encode.
* - Allocate a #th_enc_ctx handle with th_encode_alloc().
* - Perform any additional encoder configuration required with
* th_encode_ctl().
* - Repeatedly call th_encode_flushheader() to retrieve all the header
* packets.
* - For each uncompressed frame:
* - Submit the uncompressed frame via th_encode_ycbcr_in()
* - Repeatedly call th_encode_packetout() to retrieve any video data packets
* that are ready.
* - Call th_encode_free() to release all encoder memory.*/
/*@{*/
/**Allocates an encoder instance.
* \param _info A #th_info struct filled with the desired encoding parameters.
* \return The initialized #th_enc_ctx handle.
* \retval NULL If the encoding parameters were invalid.*/
extern th_enc_ctx *th_encode_alloc(const th_info *_info);
/**Encoder control function.
* This is used to provide advanced control the encoding process.
* \param _enc A #th_enc_ctx handle.
* \param _req The control code to process.
* See \ref encctlcodes "the list of available control codes"
* for details.
* \param _buf The parameters for this control code.
* \param _buf_sz The size of the parameter buffer.*/
extern int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz);
/**Outputs the next header packet.
* This should be called repeatedly after encoder initialization until it
* returns 0 in order to get all of the header packets, in order, before
* encoding actual video data.
* \param _enc A #th_enc_ctx handle.
* \param _comments The metadata to place in the comment header, when it is
* encoded.
* \param _op An <tt>ogg_packet</tt> structure to fill.
* All of the elements of this structure will be set,
* including a pointer to the header data.
* The memory for the header data is owned by
* <tt>libtheoraenc</tt>, and may be invalidated when the
* next encoder function is called.
* \return A positive value indicates that a header packet was successfully
* produced.
* \retval 0 No packet was produced, and no more header packets remain.
* \retval TH_EFAULT \a _enc, \a _comments, or \a _op was <tt>NULL</tt>.*/
extern int th_encode_flushheader(th_enc_ctx *_enc,
th_comment *_comments,ogg_packet *_op);
/**Submits an uncompressed frame to the encoder.
* \param _enc A #th_enc_ctx handle.
* \param _ycbcr A buffer of Y'CbCr data to encode.
* \retval 0 Success.
* \retval TH_EFAULT \a _enc or \a _ycbcr is <tt>NULL</tt>.
* \retval TH_EINVAL The buffer size does not match the frame size the encoder
* was initialized with, or encoding has already
* completed.*/
extern int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _ycbcr);
/**Retrieves encoded video data packets.
* This should be called repeatedly after each frame is submitted to flush any
* encoded packets, until it returns 0.
* The encoder will not buffer these packets as subsequent frames are
* compressed, so a failure to do so will result in lost video data.
* \note Currently the encoder operates in a one-frame-in, one-packet-out
* manner.
* However, this may be changed in the future.
* \param _enc A #th_enc_ctx handle.
* \param _last Set this flag to a non-zero value if no more uncompressed
* frames will be submitted.
* This ensures that a proper EOS flag is set on the last packet.
* \param _op An <tt>ogg_packet</tt> structure to fill.
* All of the elements of this structure will be set, including a
* pointer to the video data.
* The memory for the video data is owned by
* <tt>libtheoraenc</tt>, and may be invalidated when the next
* encoder function is called.
* \return A positive value indicates that a video data packet was successfully
* produced.
* \retval 0 No packet was produced, and no more encoded video data
* remains.
* \retval TH_EFAULT \a _enc or \a _op was <tt>NULL</tt>.*/
extern int th_encode_packetout(th_enc_ctx *_enc,int _last,ogg_packet *_op);
/**Frees an allocated encoder instance.
* \param _enc A #th_enc_ctx handle.*/
extern void th_encode_free(th_enc_ctx *_enc);
/*@}*/
/*@}*/
#if defined(__cplusplus)
}
#endif
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,40 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: toplevel_lookup.h,v 1.1 2004/02/24 13:50:13 shatty Exp $
********************************************************************/
#include "encoder_internal.h"
ogg_uint32_t PriorKeyFrameWeight[KEY_FRAME_CONTEXT] = { 1,2,3,4,5 };
/* Data structures controlling addition of residue blocks */
ogg_uint32_t ResidueErrorThresh[Q_TABLE_SIZE] = {
750, 700, 650, 600, 590, 580, 570, 560,
550, 540, 530, 520, 510, 500, 490, 480,
470, 460, 450, 440, 430, 420, 410, 400,
390, 380, 370, 360, 350, 340, 330, 320,
310, 300, 290, 280, 270, 260, 250, 245,
240, 235, 230, 225, 220, 215, 210, 205,
200, 195, 190, 185, 180, 175, 170, 165,
160, 155, 150, 145, 140, 135, 130, 130 };
ogg_uint32_t ResidueBlockFactor[Q_TABLE_SIZE] = {
3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2 };

View File

@ -0,0 +1,900 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: dsp_mmx.c 14579 2008-03-12 06:42:40Z xiphmont $
********************************************************************/
#include <stddef.h>
#include "x86enc.h"
#if defined(OC_X86_ASM)
unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src,
const unsigned char *_ref,int _ystride){
ptrdiff_t ystride3;
ptrdiff_t ret;
__asm__ __volatile__(
/*Load the first 4 rows of each block.*/
"movq (%[src]),%%mm0\n\t"
"movq (%[ref]),%%mm1\n\t"
"movq (%[src],%[ystride]),%%mm2\n\t"
"movq (%[ref],%[ystride]),%%mm3\n\t"
"lea (%[ystride],%[ystride],2),%[ystride3]\n\t"
"movq (%[src],%[ystride],2),%%mm4\n\t"
"movq (%[ref],%[ystride],2),%%mm5\n\t"
"movq (%[src],%[ystride3]),%%mm6\n\t"
"movq (%[ref],%[ystride3]),%%mm7\n\t"
/*Compute their SADs and add them in %%mm0*/
"psadbw %%mm1,%%mm0\n\t"
"psadbw %%mm3,%%mm2\n\t"
"lea (%[src],%[ystride],4),%[src]\n\t"
"paddw %%mm2,%%mm0\n\t"
"lea (%[ref],%[ystride],4),%[ref]\n\t"
/*Load the next 3 rows as registers become available.*/
"movq (%[src]),%%mm2\n\t"
"movq (%[ref]),%%mm3\n\t"
"psadbw %%mm5,%%mm4\n\t"
"psadbw %%mm7,%%mm6\n\t"
"paddw %%mm4,%%mm0\n\t"
"movq (%[ref],%[ystride]),%%mm5\n\t"
"movq (%[src],%[ystride]),%%mm4\n\t"
"paddw %%mm6,%%mm0\n\t"
"movq (%[ref],%[ystride],2),%%mm7\n\t"
"movq (%[src],%[ystride],2),%%mm6\n\t"
/*Start adding their SADs to %%mm0*/
"psadbw %%mm3,%%mm2\n\t"
"psadbw %%mm5,%%mm4\n\t"
"paddw %%mm2,%%mm0\n\t"
"psadbw %%mm7,%%mm6\n\t"
/*Load last row as registers become available.*/
"movq (%[src],%[ystride3]),%%mm2\n\t"
"movq (%[ref],%[ystride3]),%%mm3\n\t"
/*And finish adding up their SADs.*/
"paddw %%mm4,%%mm0\n\t"
"psadbw %%mm3,%%mm2\n\t"
"paddw %%mm6,%%mm0\n\t"
"paddw %%mm2,%%mm0\n\t"
"movd %%mm0,%[ret]\n\t"
:[ret]"=a"(ret),[src]"+%r"(_src),[ref]"+r"(_ref),[ystride3]"=&r"(ystride3)
:[ystride]"r"((ptrdiff_t)_ystride)
);
return (unsigned)ret;
}
unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _thresh){
/*Early termination is for suckers.*/
return oc_enc_frag_sad_mmxext(_src,_ref,_ystride);
}
/*Assumes the first two rows of %[ref1] and %[ref2] are in %%mm0...%%mm3, the
first two rows of %[src] are in %%mm4,%%mm5, and {1}x8 is in %%mm7.
We pre-load the next two rows of data as registers become available.*/
#define OC_SAD2_LOOP \
"#OC_SAD2_LOOP\n\t" \
/*We want to compute (%%mm0+%%mm1>>1) on unsigned bytes without overflow, but \
pavgb computes (%%mm0+%%mm1+1>>1). \
The latter is exactly 1 too large when the low bit of two corresponding \
bytes is only set in one of them. \
Therefore we pxor the operands, pand to mask out the low bits, and psubb to \
correct the output of pavgb.*/ \
"movq %%mm0,%%mm6\n\t" \
"lea (%[ref1],%[ystride],2),%[ref1]\n\t" \
"pxor %%mm1,%%mm0\n\t" \
"pavgb %%mm1,%%mm6\n\t" \
"lea (%[ref2],%[ystride],2),%[ref2]\n\t" \
"movq %%mm2,%%mm1\n\t" \
"pand %%mm7,%%mm0\n\t" \
"pavgb %%mm3,%%mm2\n\t" \
"pxor %%mm3,%%mm1\n\t" \
"movq (%[ref2],%[ystride]),%%mm3\n\t" \
"psubb %%mm0,%%mm6\n\t" \
"movq (%[ref1]),%%mm0\n\t" \
"pand %%mm7,%%mm1\n\t" \
"psadbw %%mm6,%%mm4\n\t" \
"movd %[ret],%%mm6\n\t" \
"psubb %%mm1,%%mm2\n\t" \
"movq (%[ref2]),%%mm1\n\t" \
"lea (%[src],%[ystride],2),%[src]\n\t" \
"psadbw %%mm2,%%mm5\n\t" \
"movq (%[ref1],%[ystride]),%%mm2\n\t" \
"paddw %%mm4,%%mm5\n\t" \
"movq (%[src]),%%mm4\n\t" \
"paddw %%mm5,%%mm6\n\t" \
"movq (%[src],%[ystride]),%%mm5\n\t" \
"movd %%mm6,%[ret]\n\t" \
/*Same as above, but does not pre-load the next two rows.*/
#define OC_SAD2_TAIL \
"#OC_SAD2_TAIL\n\t" \
"movq %%mm0,%%mm6\n\t" \
"pavgb %%mm1,%%mm0\n\t" \
"pxor %%mm1,%%mm6\n\t" \
"movq %%mm2,%%mm1\n\t" \
"pand %%mm7,%%mm6\n\t" \
"pavgb %%mm3,%%mm2\n\t" \
"pxor %%mm3,%%mm1\n\t" \
"psubb %%mm6,%%mm0\n\t" \
"pand %%mm7,%%mm1\n\t" \
"psadbw %%mm0,%%mm4\n\t" \
"psubb %%mm1,%%mm2\n\t" \
"movd %[ret],%%mm6\n\t" \
"psadbw %%mm2,%%mm5\n\t" \
"paddw %%mm4,%%mm5\n\t" \
"paddw %%mm5,%%mm6\n\t" \
"movd %%mm6,%[ret]\n\t" \
unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh){
ptrdiff_t ret;
__asm__ __volatile__(
"movq (%[ref1]),%%mm0\n\t"
"movq (%[ref2]),%%mm1\n\t"
"movq (%[ref1],%[ystride]),%%mm2\n\t"
"movq (%[ref2],%[ystride]),%%mm3\n\t"
"xor %[ret],%[ret]\n\t"
"movq (%[src]),%%mm4\n\t"
"pxor %%mm7,%%mm7\n\t"
"pcmpeqb %%mm6,%%mm6\n\t"
"movq (%[src],%[ystride]),%%mm5\n\t"
"psubb %%mm6,%%mm7\n\t"
OC_SAD2_LOOP
OC_SAD2_LOOP
OC_SAD2_LOOP
OC_SAD2_TAIL
:[ret]"=&a"(ret),[src]"+r"(_src),[ref1]"+%r"(_ref1),[ref2]"+r"(_ref2)
:[ystride]"r"((ptrdiff_t)_ystride)
);
return (unsigned)ret;
}
/*Load an 8x4 array of pixel values from %[src] and %[ref] and compute their
16-bit difference in %%mm0...%%mm7.*/
#define OC_LOAD_SUB_8x4(_off) \
"#OC_LOAD_SUB_8x4\n\t" \
"movd "_off"(%[src]),%%mm0\n\t" \
"movd "_off"(%[ref]),%%mm4\n\t" \
"movd "_off"(%[src],%[src_ystride]),%%mm1\n\t" \
"lea (%[src],%[src_ystride],2),%[src]\n\t" \
"movd "_off"(%[ref],%[ref_ystride]),%%mm5\n\t" \
"lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \
"movd "_off"(%[src]),%%mm2\n\t" \
"movd "_off"(%[ref]),%%mm7\n\t" \
"movd "_off"(%[src],%[src_ystride]),%%mm3\n\t" \
"movd "_off"(%[ref],%[ref_ystride]),%%mm6\n\t" \
"punpcklbw %%mm4,%%mm0\n\t" \
"lea (%[src],%[src_ystride],2),%[src]\n\t" \
"punpcklbw %%mm4,%%mm4\n\t" \
"lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \
"psubw %%mm4,%%mm0\n\t" \
"movd "_off"(%[src]),%%mm4\n\t" \
"movq %%mm0,"_off"*2(%[buf])\n\t" \
"movd "_off"(%[ref]),%%mm0\n\t" \
"punpcklbw %%mm5,%%mm1\n\t" \
"punpcklbw %%mm5,%%mm5\n\t" \
"psubw %%mm5,%%mm1\n\t" \
"movd "_off"(%[src],%[src_ystride]),%%mm5\n\t" \
"punpcklbw %%mm7,%%mm2\n\t" \
"punpcklbw %%mm7,%%mm7\n\t" \
"psubw %%mm7,%%mm2\n\t" \
"movd "_off"(%[ref],%[ref_ystride]),%%mm7\n\t" \
"punpcklbw %%mm6,%%mm3\n\t" \
"lea (%[src],%[src_ystride],2),%[src]\n\t" \
"punpcklbw %%mm6,%%mm6\n\t" \
"psubw %%mm6,%%mm3\n\t" \
"movd "_off"(%[src]),%%mm6\n\t" \
"punpcklbw %%mm0,%%mm4\n\t" \
"lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \
"punpcklbw %%mm0,%%mm0\n\t" \
"lea (%[src],%[src_ystride],2),%[src]\n\t" \
"psubw %%mm0,%%mm4\n\t" \
"movd "_off"(%[ref]),%%mm0\n\t" \
"punpcklbw %%mm7,%%mm5\n\t" \
"neg %[src_ystride]\n\t" \
"punpcklbw %%mm7,%%mm7\n\t" \
"psubw %%mm7,%%mm5\n\t" \
"movd "_off"(%[src],%[src_ystride]),%%mm7\n\t" \
"punpcklbw %%mm0,%%mm6\n\t" \
"lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \
"punpcklbw %%mm0,%%mm0\n\t" \
"neg %[ref_ystride]\n\t" \
"psubw %%mm0,%%mm6\n\t" \
"movd "_off"(%[ref],%[ref_ystride]),%%mm0\n\t" \
"lea (%[src],%[src_ystride],8),%[src]\n\t" \
"punpcklbw %%mm0,%%mm7\n\t" \
"neg %[src_ystride]\n\t" \
"punpcklbw %%mm0,%%mm0\n\t" \
"lea (%[ref],%[ref_ystride],8),%[ref]\n\t" \
"psubw %%mm0,%%mm7\n\t" \
"neg %[ref_ystride]\n\t" \
"movq "_off"*2(%[buf]),%%mm0\n\t" \
/*Load an 8x4 array of pixel values from %[src] into %%mm0...%%mm7.*/
#define OC_LOAD_8x4(_off) \
"#OC_LOAD_8x4\n\t" \
"movd "_off"(%[src]),%%mm0\n\t" \
"movd "_off"(%[src],%[ystride]),%%mm1\n\t" \
"movd "_off"(%[src],%[ystride],2),%%mm2\n\t" \
"pxor %%mm7,%%mm7\n\t" \
"movd "_off"(%[src],%[ystride3]),%%mm3\n\t" \
"punpcklbw %%mm7,%%mm0\n\t" \
"movd "_off"(%[src4]),%%mm4\n\t" \
"punpcklbw %%mm7,%%mm1\n\t" \
"movd "_off"(%[src4],%[ystride]),%%mm5\n\t" \
"punpcklbw %%mm7,%%mm2\n\t" \
"movd "_off"(%[src4],%[ystride],2),%%mm6\n\t" \
"punpcklbw %%mm7,%%mm3\n\t" \
"movd "_off"(%[src4],%[ystride3]),%%mm7\n\t" \
"punpcklbw %%mm4,%%mm4\n\t" \
"punpcklbw %%mm5,%%mm5\n\t" \
"psrlw $8,%%mm4\n\t" \
"psrlw $8,%%mm5\n\t" \
"punpcklbw %%mm6,%%mm6\n\t" \
"punpcklbw %%mm7,%%mm7\n\t" \
"psrlw $8,%%mm6\n\t" \
"psrlw $8,%%mm7\n\t" \
/*Performs the first two stages of an 8-point 1-D Hadamard transform.
The transform is performed in place, except that outputs 0-3 are swapped with
outputs 4-7.
Outputs 2, 3, 6 and 7 from the second stage are negated (which allows us to
perform this stage in place with no temporary registers).*/
#define OC_HADAMARD_AB_8x4 \
"#OC_HADAMARD_AB_8x4\n\t" \
/*Stage A: \
Outputs 0-3 are swapped with 4-7 here.*/ \
"paddw %%mm1,%%mm5\n\t" \
"paddw %%mm2,%%mm6\n\t" \
"paddw %%mm1,%%mm1\n\t" \
"paddw %%mm2,%%mm2\n\t" \
"psubw %%mm5,%%mm1\n\t" \
"psubw %%mm6,%%mm2\n\t" \
"paddw %%mm3,%%mm7\n\t" \
"paddw %%mm0,%%mm4\n\t" \
"paddw %%mm3,%%mm3\n\t" \
"paddw %%mm0,%%mm0\n\t" \
"psubw %%mm7,%%mm3\n\t" \
"psubw %%mm4,%%mm0\n\t" \
/*Stage B:*/ \
"paddw %%mm2,%%mm0\n\t" \
"paddw %%mm3,%%mm1\n\t" \
"paddw %%mm6,%%mm4\n\t" \
"paddw %%mm7,%%mm5\n\t" \
"paddw %%mm2,%%mm2\n\t" \
"paddw %%mm3,%%mm3\n\t" \
"paddw %%mm6,%%mm6\n\t" \
"paddw %%mm7,%%mm7\n\t" \
"psubw %%mm0,%%mm2\n\t" \
"psubw %%mm1,%%mm3\n\t" \
"psubw %%mm4,%%mm6\n\t" \
"psubw %%mm5,%%mm7\n\t" \
/*Performs the last stage of an 8-point 1-D Hadamard transform in place.
Ouputs 1, 3, 5, and 7 are negated (which allows us to perform this stage in
place with no temporary registers).*/
#define OC_HADAMARD_C_8x4 \
"#OC_HADAMARD_C_8x4\n\t" \
/*Stage C:*/ \
"paddw %%mm1,%%mm0\n\t" \
"paddw %%mm3,%%mm2\n\t" \
"paddw %%mm5,%%mm4\n\t" \
"paddw %%mm7,%%mm6\n\t" \
"paddw %%mm1,%%mm1\n\t" \
"paddw %%mm3,%%mm3\n\t" \
"paddw %%mm5,%%mm5\n\t" \
"paddw %%mm7,%%mm7\n\t" \
"psubw %%mm0,%%mm1\n\t" \
"psubw %%mm2,%%mm3\n\t" \
"psubw %%mm4,%%mm5\n\t" \
"psubw %%mm6,%%mm7\n\t" \
/*Performs an 8-point 1-D Hadamard transform.
The transform is performed in place, except that outputs 0-3 are swapped with
outputs 4-7.
Outputs 1, 2, 5 and 6 are negated (which allows us to perform the transform
in place with no temporary registers).*/
#define OC_HADAMARD_8x4 \
OC_HADAMARD_AB_8x4 \
OC_HADAMARD_C_8x4 \
/*Performs the first part of the final stage of the Hadamard transform and
summing of absolute values.
At the end of this part, %%mm1 will contain the DC coefficient of the
transform.*/
#define OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \
/*We use the fact that \
(abs(a+b)+abs(a-b))/2=max(abs(a),abs(b)) \
to merge the final butterfly with the abs and the first stage of \
accumulation. \
Thus we can avoid using pabsw, which is not available until SSSE3. \
Emulating pabsw takes 3 instructions, so the straightforward MMXEXT \
implementation would be (3+3)*8+7=55 instructions (+4 for spilling \
registers). \
Even with pabsw, it would be (3+1)*8+7=39 instructions (with no spills). \
This implementation is only 26 (+4 for spilling registers).*/ \
"#OC_HADAMARD_C_ABS_ACCUM_A_8x4\n\t" \
"movq %%mm7,"_r7"(%[buf])\n\t" \
"movq %%mm6,"_r6"(%[buf])\n\t" \
/*mm7={0x7FFF}x4 \
mm0=max(abs(mm0),abs(mm1))-0x7FFF*/ \
"pcmpeqb %%mm7,%%mm7\n\t" \
"movq %%mm0,%%mm6\n\t" \
"psrlw $1,%%mm7\n\t" \
"paddw %%mm1,%%mm6\n\t" \
"pmaxsw %%mm1,%%mm0\n\t" \
"paddsw %%mm7,%%mm6\n\t" \
"psubw %%mm6,%%mm0\n\t" \
/*mm2=max(abs(mm2),abs(mm3))-0x7FFF \
mm4=max(abs(mm4),abs(mm5))-0x7FFF*/ \
"movq %%mm2,%%mm6\n\t" \
"movq %%mm4,%%mm1\n\t" \
"pmaxsw %%mm3,%%mm2\n\t" \
"pmaxsw %%mm5,%%mm4\n\t" \
"paddw %%mm3,%%mm6\n\t" \
"paddw %%mm5,%%mm1\n\t" \
"movq "_r7"(%[buf]),%%mm3\n\t" \
/*Performs the second part of the final stage of the Hadamard transform and
summing of absolute values.*/
#define OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \
"#OC_HADAMARD_C_ABS_ACCUM_B_8x4\n\t" \
"paddsw %%mm7,%%mm6\n\t" \
"movq "_r6"(%[buf]),%%mm5\n\t" \
"paddsw %%mm7,%%mm1\n\t" \
"psubw %%mm6,%%mm2\n\t" \
"psubw %%mm1,%%mm4\n\t" \
/*mm7={1}x4 (needed for the horizontal add that follows) \
mm0+=mm2+mm4+max(abs(mm3),abs(mm5))-0x7FFF*/ \
"movq %%mm3,%%mm6\n\t" \
"pmaxsw %%mm5,%%mm3\n\t" \
"paddw %%mm2,%%mm0\n\t" \
"paddw %%mm5,%%mm6\n\t" \
"paddw %%mm4,%%mm0\n\t" \
"paddsw %%mm7,%%mm6\n\t" \
"paddw %%mm3,%%mm0\n\t" \
"psrlw $14,%%mm7\n\t" \
"psubw %%mm6,%%mm0\n\t" \
/*Performs the last stage of an 8-point 1-D Hadamard transform, takes the
absolute value of each component, and accumulates everything into mm0.
This is the only portion of SATD which requires MMXEXT (we could use plain
MMX, but it takes 4 instructions and an extra register to work around the
lack of a pmaxsw, which is a pretty serious penalty).*/
#define OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) \
OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \
OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \
/*Performs an 8-point 1-D Hadamard transform, takes the absolute value of each
component, and accumulates everything into mm0.
Note that mm0 will have an extra 4 added to each column, and that after
removing this value, the remainder will be half the conventional value.*/
#define OC_HADAMARD_ABS_ACCUM_8x4(_r6,_r7) \
OC_HADAMARD_AB_8x4 \
OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7)
/*Performs two 4x4 transposes (mostly) in place.
On input, {mm0,mm1,mm2,mm3} contains rows {e,f,g,h}, and {mm4,mm5,mm6,mm7}
contains rows {a,b,c,d}.
On output, {0x40,0x50,0x60,0x70}+_off(%[buf]) contains {e,f,g,h}^T, and
{mm4,mm5,mm6,mm7} contains the transposed rows {a,b,c,d}^T.*/
#define OC_TRANSPOSE_4x4x2(_off) \
"#OC_TRANSPOSE_4x4x2\n\t" \
/*First 4x4 transpose:*/ \
"movq %%mm5,0x10+"_off"(%[buf])\n\t" \
/*mm0 = e3 e2 e1 e0 \
mm1 = f3 f2 f1 f0 \
mm2 = g3 g2 g1 g0 \
mm3 = h3 h2 h1 h0*/ \
"movq %%mm2,%%mm5\n\t" \
"punpcklwd %%mm3,%%mm2\n\t" \
"punpckhwd %%mm3,%%mm5\n\t" \
"movq %%mm0,%%mm3\n\t" \
"punpcklwd %%mm1,%%mm0\n\t" \
"punpckhwd %%mm1,%%mm3\n\t" \
/*mm0 = f1 e1 f0 e0 \
mm3 = f3 e3 f2 e2 \
mm2 = h1 g1 h0 g0 \
mm5 = h3 g3 h2 g2*/ \
"movq %%mm0,%%mm1\n\t" \
"punpckldq %%mm2,%%mm0\n\t" \
"punpckhdq %%mm2,%%mm1\n\t" \
"movq %%mm3,%%mm2\n\t" \
"punpckhdq %%mm5,%%mm3\n\t" \
"movq %%mm0,0x40+"_off"(%[buf])\n\t" \
"punpckldq %%mm5,%%mm2\n\t" \
/*mm0 = h0 g0 f0 e0 \
mm1 = h1 g1 f1 e1 \
mm2 = h2 g2 f2 e2 \
mm3 = h3 g3 f3 e3*/ \
"movq 0x10+"_off"(%[buf]),%%mm5\n\t" \
/*Second 4x4 transpose:*/ \
/*mm4 = a3 a2 a1 a0 \
mm5 = b3 b2 b1 b0 \
mm6 = c3 c2 c1 c0 \
mm7 = d3 d2 d1 d0*/ \
"movq %%mm6,%%mm0\n\t" \
"punpcklwd %%mm7,%%mm6\n\t" \
"movq %%mm1,0x50+"_off"(%[buf])\n\t" \
"punpckhwd %%mm7,%%mm0\n\t" \
"movq %%mm4,%%mm7\n\t" \
"punpcklwd %%mm5,%%mm4\n\t" \
"movq %%mm2,0x60+"_off"(%[buf])\n\t" \
"punpckhwd %%mm5,%%mm7\n\t" \
/*mm4 = b1 a1 b0 a0 \
mm7 = b3 a3 b2 a2 \
mm6 = d1 c1 d0 c0 \
mm0 = d3 c3 d2 c2*/ \
"movq %%mm4,%%mm5\n\t" \
"punpckldq %%mm6,%%mm4\n\t" \
"movq %%mm3,0x70+"_off"(%[buf])\n\t" \
"punpckhdq %%mm6,%%mm5\n\t" \
"movq %%mm7,%%mm6\n\t" \
"punpckhdq %%mm0,%%mm7\n\t" \
"punpckldq %%mm0,%%mm6\n\t" \
/*mm4 = d0 c0 b0 a0 \
mm5 = d1 c1 b1 a1 \
mm6 = d2 c2 b2 a2 \
mm7 = d3 c3 b3 a3*/ \
static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src,
int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){
OC_ALIGN8(ogg_int16_t buf[64]);
ogg_int16_t *bufp;
unsigned ret;
unsigned ret2;
bufp=buf;
__asm__ __volatile__(
OC_LOAD_SUB_8x4("0x00")
OC_HADAMARD_8x4
OC_TRANSPOSE_4x4x2("0x00")
/*Finish swapping out this 8x4 block to make room for the next one.
mm0...mm3 have been swapped out already.*/
"movq %%mm4,0x00(%[buf])\n\t"
"movq %%mm5,0x10(%[buf])\n\t"
"movq %%mm6,0x20(%[buf])\n\t"
"movq %%mm7,0x30(%[buf])\n\t"
OC_LOAD_SUB_8x4("0x04")
OC_HADAMARD_8x4
OC_TRANSPOSE_4x4x2("0x08")
/*Here the first 4x4 block of output from the last transpose is the second
4x4 block of input for the next transform.
We have cleverly arranged that it already be in the appropriate place, so
we only have to do half the loads.*/
"movq 0x10(%[buf]),%%mm1\n\t"
"movq 0x20(%[buf]),%%mm2\n\t"
"movq 0x30(%[buf]),%%mm3\n\t"
"movq 0x00(%[buf]),%%mm0\n\t"
OC_HADAMARD_ABS_ACCUM_8x4("0x28","0x38")
/*Up to this point, everything fit in 16 bits (8 input + 1 for the
difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
for the factor of two we dropped + 3 for the vertical accumulation).
Now we finally have to promote things to dwords.
We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long
latency of pmaddwd by starting the next series of loads now.*/
"mov %[thresh],%[ret2]\n\t"
"pmaddwd %%mm7,%%mm0\n\t"
"movq 0x50(%[buf]),%%mm1\n\t"
"movq 0x58(%[buf]),%%mm5\n\t"
"movq %%mm0,%%mm4\n\t"
"movq 0x60(%[buf]),%%mm2\n\t"
"punpckhdq %%mm0,%%mm0\n\t"
"movq 0x68(%[buf]),%%mm6\n\t"
"paddd %%mm0,%%mm4\n\t"
"movq 0x70(%[buf]),%%mm3\n\t"
"movd %%mm4,%[ret]\n\t"
"movq 0x78(%[buf]),%%mm7\n\t"
/*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4
added to them, and a factor of two removed; correct the final sum here.*/
"lea -32(%[ret],%[ret]),%[ret]\n\t"
"movq 0x40(%[buf]),%%mm0\n\t"
"cmp %[ret2],%[ret]\n\t"
"movq 0x48(%[buf]),%%mm4\n\t"
"jae 1f\n\t"
OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78")
"pmaddwd %%mm7,%%mm0\n\t"
/*There isn't much to stick in here to hide the latency this time, but the
alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose
latency is even worse.*/
"sub $32,%[ret]\n\t"
"movq %%mm0,%%mm4\n\t"
"punpckhdq %%mm0,%%mm0\n\t"
"paddd %%mm0,%%mm4\n\t"
"movd %%mm4,%[ret2]\n\t"
"lea (%[ret],%[ret2],2),%[ret]\n\t"
".p2align 4,,15\n\t"
"1:\n\t"
/*Although it looks like we're using 7 registers here, gcc can alias %[ret]
and %[ret2] with some of the inputs, since for once we don't write to
them until after we're done using everything but %[buf] (which is also
listed as an output to ensure gcc _doesn't_ alias them against it).*/
/*Note that _src_ystride and _ref_ystride must be given non-overlapping
constraints, otherewise if gcc can prove they're equal it will allocate
them to the same register (which is bad); _src and _ref face a similar
problem, though those are never actually the same.*/
:[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp)
:[src]"r"(_src),[src_ystride]"c"((ptrdiff_t)_src_ystride),
[ref]"r"(_ref),[ref_ystride]"d"((ptrdiff_t)_ref_ystride),
[thresh]"m"(_thresh)
/*We have to use neg, so we actually clobber the condition codes for once
(not to mention cmp, sub, and add).*/
:"cc"
);
return ret;
}
unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _thresh){
return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh);
}
/*Our internal implementation of frag_copy2 takes an extra stride parameter so
we can share code with oc_enc_frag_satd2_thresh_mmxext().*/
static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride,
const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){
__asm__ __volatile__(
/*Load the first 3 rows.*/
"movq (%[src1]),%%mm0\n\t"
"movq (%[src2]),%%mm1\n\t"
"movq (%[src1],%[src_ystride]),%%mm2\n\t"
"lea (%[src1],%[src_ystride],2),%[src1]\n\t"
"movq (%[src2],%[src_ystride]),%%mm3\n\t"
"lea (%[src2],%[src_ystride],2),%[src2]\n\t"
"pxor %%mm7,%%mm7\n\t"
"movq (%[src1]),%%mm4\n\t"
"pcmpeqb %%mm6,%%mm6\n\t"
"movq (%[src2]),%%mm5\n\t"
/*mm7={1}x8.*/
"psubb %%mm6,%%mm7\n\t"
/*Start averaging %%mm0 and %%mm1 into %%mm6.*/
"movq %%mm0,%%mm6\n\t"
"pxor %%mm1,%%mm0\n\t"
"pavgb %%mm1,%%mm6\n\t"
/*%%mm1 is free, start averaging %%mm3 into %%mm2 using %%mm1.*/
"movq %%mm2,%%mm1\n\t"
"pand %%mm7,%%mm0\n\t"
"pavgb %%mm3,%%mm2\n\t"
"pxor %%mm3,%%mm1\n\t"
/*%%mm3 is free.*/
"psubb %%mm0,%%mm6\n\t"
/*%%mm0 is free, start loading the next row.*/
"movq (%[src1],%[src_ystride]),%%mm0\n\t"
/*Start averaging %%mm5 and %%mm4 using %%mm3.*/
"movq %%mm4,%%mm3\n\t"
/*%%mm6 (row 0) is done; write it out.*/
"movq %%mm6,(%[dst])\n\t"
"pand %%mm7,%%mm1\n\t"
"pavgb %%mm5,%%mm4\n\t"
"psubb %%mm1,%%mm2\n\t"
/*%%mm1 is free, continue loading the next row.*/
"movq (%[src2],%[src_ystride]),%%mm1\n\t"
"pxor %%mm5,%%mm3\n\t"
"lea (%[src1],%[src_ystride],2),%[src1]\n\t"
/*%%mm2 (row 1) is done; write it out.*/
"movq %%mm2,(%[dst],%[dst_ystride])\n\t"
"pand %%mm7,%%mm3\n\t"
/*Start loading the next row.*/
"movq (%[src1]),%%mm2\n\t"
"lea (%[dst],%[dst_ystride],2),%[dst]\n\t"
"psubb %%mm3,%%mm4\n\t"
"lea (%[src2],%[src_ystride],2),%[src2]\n\t"
/*%%mm4 (row 2) is done; write it out.*/
"movq %%mm4,(%[dst])\n\t"
/*Continue loading the next row.*/
"movq (%[src2]),%%mm3\n\t"
/*Start averaging %%mm0 and %%mm1 into %%mm6.*/
"movq %%mm0,%%mm6\n\t"
"pxor %%mm1,%%mm0\n\t"
/*Start loading the next row.*/
"movq (%[src1],%[src_ystride]),%%mm4\n\t"
"pavgb %%mm1,%%mm6\n\t"
/*%%mm1 is free; start averaging %%mm3 into %%mm2 using %%mm1.*/
"movq %%mm2,%%mm1\n\t"
"pand %%mm7,%%mm0\n\t"
/*Continue loading the next row.*/
"movq (%[src2],%[src_ystride]),%%mm5\n\t"
"pavgb %%mm3,%%mm2\n\t"
"lea (%[src1],%[src_ystride],2),%[src1]\n\t"
"pxor %%mm3,%%mm1\n\t"
/*%%mm3 is free.*/
"psubb %%mm0,%%mm6\n\t"
/*%%mm0 is free, start loading the next row.*/
"movq (%[src1]),%%mm0\n\t"
/*Start averaging %%mm5 into %%mm4 using %%mm3.*/
"movq %%mm4,%%mm3\n\t"
/*%%mm6 (row 3) is done; write it out.*/
"movq %%mm6,(%[dst],%[dst_ystride])\n\t"
"pand %%mm7,%%mm1\n\t"
"lea (%[src2],%[src_ystride],2),%[src2]\n\t"
"pavgb %%mm5,%%mm4\n\t"
"lea (%[dst],%[dst_ystride],2),%[dst]\n\t"
"psubb %%mm1,%%mm2\n\t"
/*%%mm1 is free; continue loading the next row.*/
"movq (%[src2]),%%mm1\n\t"
"pxor %%mm5,%%mm3\n\t"
/*%%mm2 (row 4) is done; write it out.*/
"movq %%mm2,(%[dst])\n\t"
"pand %%mm7,%%mm3\n\t"
/*Start loading the next row.*/
"movq (%[src1],%[src_ystride]),%%mm2\n\t"
"psubb %%mm3,%%mm4\n\t"
/*Start averaging %%mm0 and %%mm1 into %%mm6.*/
"movq %%mm0,%%mm6\n\t"
/*Continue loading the next row.*/
"movq (%[src2],%[src_ystride]),%%mm3\n\t"
/*%%mm4 (row 5) is done; write it out.*/
"movq %%mm4,(%[dst],%[dst_ystride])\n\t"
"pxor %%mm1,%%mm0\n\t"
"pavgb %%mm1,%%mm6\n\t"
/*%%mm4 is free; start averaging %%mm3 into %%mm2 using %%mm4.*/
"movq %%mm2,%%mm4\n\t"
"pand %%mm7,%%mm0\n\t"
"pavgb %%mm3,%%mm2\n\t"
"pxor %%mm3,%%mm4\n\t"
"lea (%[dst],%[dst_ystride],2),%[dst]\n\t"
"psubb %%mm0,%%mm6\n\t"
"pand %%mm7,%%mm4\n\t"
/*%%mm6 (row 6) is done, write it out.*/
"movq %%mm6,(%[dst])\n\t"
"psubb %%mm4,%%mm2\n\t"
/*%%mm2 (row 7) is done, write it out.*/
"movq %%mm2,(%[dst],%[dst_ystride])\n\t"
:[dst]"+r"(_dst),[src1]"+%r"(_src1),[src2]"+r"(_src2)
:[dst_ystride]"r"((ptrdiff_t)_dst_ystride),
[src_ystride]"r"((ptrdiff_t)_src_ystride)
:"memory"
);
}
unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh){
OC_ALIGN8(unsigned char ref[64]);
oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride);
return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh);
}
unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,
int _ystride){
OC_ALIGN8(ogg_int16_t buf[64]);
ogg_int16_t *bufp;
unsigned ret;
unsigned ret2;
bufp=buf;
__asm__ __volatile__(
OC_LOAD_8x4("0x00")
OC_HADAMARD_8x4
OC_TRANSPOSE_4x4x2("0x00")
/*Finish swapping out this 8x4 block to make room for the next one.
mm0...mm3 have been swapped out already.*/
"movq %%mm4,0x00(%[buf])\n\t"
"movq %%mm5,0x10(%[buf])\n\t"
"movq %%mm6,0x20(%[buf])\n\t"
"movq %%mm7,0x30(%[buf])\n\t"
OC_LOAD_8x4("0x04")
OC_HADAMARD_8x4
OC_TRANSPOSE_4x4x2("0x08")
/*Here the first 4x4 block of output from the last transpose is the second
4x4 block of input for the next transform.
We have cleverly arranged that it already be in the appropriate place, so
we only have to do half the loads.*/
"movq 0x10(%[buf]),%%mm1\n\t"
"movq 0x20(%[buf]),%%mm2\n\t"
"movq 0x30(%[buf]),%%mm3\n\t"
"movq 0x00(%[buf]),%%mm0\n\t"
/*We split out the stages here so we can save the DC coefficient in the
middle.*/
OC_HADAMARD_AB_8x4
OC_HADAMARD_C_ABS_ACCUM_A_8x4("0x28","0x38")
"movd %%mm1,%[ret]\n\t"
OC_HADAMARD_C_ABS_ACCUM_B_8x4("0x28","0x38")
/*Up to this point, everything fit in 16 bits (8 input + 1 for the
difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
for the factor of two we dropped + 3 for the vertical accumulation).
Now we finally have to promote things to dwords.
We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long
latency of pmaddwd by starting the next series of loads now.*/
"pmaddwd %%mm7,%%mm0\n\t"
"movq 0x50(%[buf]),%%mm1\n\t"
"movq 0x58(%[buf]),%%mm5\n\t"
"movq 0x60(%[buf]),%%mm2\n\t"
"movq %%mm0,%%mm4\n\t"
"movq 0x68(%[buf]),%%mm6\n\t"
"punpckhdq %%mm0,%%mm0\n\t"
"movq 0x70(%[buf]),%%mm3\n\t"
"paddd %%mm0,%%mm4\n\t"
"movq 0x78(%[buf]),%%mm7\n\t"
"movd %%mm4,%[ret2]\n\t"
"movq 0x40(%[buf]),%%mm0\n\t"
"movq 0x48(%[buf]),%%mm4\n\t"
OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78")
"pmaddwd %%mm7,%%mm0\n\t"
/*We assume that the DC coefficient is always positive (which is true,
because the input to the INTRA transform was not a difference).*/
"movzx %w[ret],%[ret]\n\t"
"add %[ret2],%[ret2]\n\t"
"sub %[ret],%[ret2]\n\t"
"movq %%mm0,%%mm4\n\t"
"punpckhdq %%mm0,%%mm0\n\t"
"paddd %%mm0,%%mm4\n\t"
"movd %%mm4,%[ret]\n\t"
"lea -64(%[ret2],%[ret],2),%[ret]\n\t"
/*Although it looks like we're using 7 registers here, gcc can alias %[ret]
and %[ret2] with some of the inputs, since for once we don't write to
them until after we're done using everything but %[buf] (which is also
listed as an output to ensure gcc _doesn't_ alias them against it).*/
:[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp)
:[src]"r"(_src),[src4]"r"(_src+4*_ystride),
[ystride]"r"((ptrdiff_t)_ystride),[ystride3]"r"((ptrdiff_t)3*_ystride)
/*We have to use sub, so we actually clobber the condition codes for once
(not to mention add).*/
:"cc"
);
return ret;
}
void oc_enc_frag_sub_mmx(ogg_int16_t _residue[64],
const unsigned char *_src,const unsigned char *_ref,int _ystride){
int i;
__asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::);
for(i=4;i-->0;){
__asm__ __volatile__(
/*mm0=[src]*/
"movq (%[src]),%%mm0\n\t"
/*mm1=[ref]*/
"movq (%[ref]),%%mm1\n\t"
/*mm4=[src+ystride]*/
"movq (%[src],%[ystride]),%%mm4\n\t"
/*mm5=[ref+ystride]*/
"movq (%[ref],%[ystride]),%%mm5\n\t"
/*Compute [src]-[ref].*/
"movq %%mm0,%%mm2\n\t"
"punpcklbw %%mm7,%%mm0\n\t"
"movq %%mm1,%%mm3\n\t"
"punpckhbw %%mm7,%%mm2\n\t"
"punpcklbw %%mm7,%%mm1\n\t"
"punpckhbw %%mm7,%%mm3\n\t"
"psubw %%mm1,%%mm0\n\t"
"psubw %%mm3,%%mm2\n\t"
/*Compute [src+ystride]-[ref+ystride].*/
"movq %%mm4,%%mm1\n\t"
"punpcklbw %%mm7,%%mm4\n\t"
"movq %%mm5,%%mm3\n\t"
"punpckhbw %%mm7,%%mm1\n\t"
"lea (%[src],%[ystride],2),%[src]\n\t"
"punpcklbw %%mm7,%%mm5\n\t"
"lea (%[ref],%[ystride],2),%[ref]\n\t"
"punpckhbw %%mm7,%%mm3\n\t"
"psubw %%mm5,%%mm4\n\t"
"psubw %%mm3,%%mm1\n\t"
/*Write the answer out.*/
"movq %%mm0,0x00(%[residue])\n\t"
"movq %%mm2,0x08(%[residue])\n\t"
"movq %%mm4,0x10(%[residue])\n\t"
"movq %%mm1,0x18(%[residue])\n\t"
"lea 0x20(%[residue]),%[residue]\n\t"
:[residue]"+r"(_residue),[src]"+r"(_src),[ref]"+r"(_ref)
:[ystride]"r"((ptrdiff_t)_ystride)
:"memory"
);
}
}
void oc_enc_frag_sub_128_mmx(ogg_int16_t _residue[64],
const unsigned char *_src,int _ystride){
ptrdiff_t ystride3;
__asm__ __volatile__(
/*mm0=[src]*/
"movq (%[src]),%%mm0\n\t"
/*mm1=[src+ystride]*/
"movq (%[src],%[ystride]),%%mm1\n\t"
/*mm6={-1}x4*/
"pcmpeqw %%mm6,%%mm6\n\t"
/*mm2=[src+2*ystride]*/
"movq (%[src],%[ystride],2),%%mm2\n\t"
/*[ystride3]=3*[ystride]*/
"lea (%[ystride],%[ystride],2),%[ystride3]\n\t"
/*mm6={1}x4*/
"psllw $15,%%mm6\n\t"
/*mm3=[src+3*ystride]*/
"movq (%[src],%[ystride3]),%%mm3\n\t"
/*mm6={128}x4*/
"psrlw $8,%%mm6\n\t"
/*mm7=0*/
"pxor %%mm7,%%mm7\n\t"
/*[src]=[src]+4*[ystride]*/
"lea (%[src],%[ystride],4),%[src]\n\t"
/*Compute [src]-128 and [src+ystride]-128*/
"movq %%mm0,%%mm4\n\t"
"punpcklbw %%mm7,%%mm0\n\t"
"movq %%mm1,%%mm5\n\t"
"punpckhbw %%mm7,%%mm4\n\t"
"psubw %%mm6,%%mm0\n\t"
"punpcklbw %%mm7,%%mm1\n\t"
"psubw %%mm6,%%mm4\n\t"
"punpckhbw %%mm7,%%mm5\n\t"
"psubw %%mm6,%%mm1\n\t"
"psubw %%mm6,%%mm5\n\t"
/*Write the answer out.*/
"movq %%mm0,0x00(%[residue])\n\t"
"movq %%mm4,0x08(%[residue])\n\t"
"movq %%mm1,0x10(%[residue])\n\t"
"movq %%mm5,0x18(%[residue])\n\t"
/*mm0=[src+4*ystride]*/
"movq (%[src]),%%mm0\n\t"
/*mm1=[src+5*ystride]*/
"movq (%[src],%[ystride]),%%mm1\n\t"
/*Compute [src+2*ystride]-128 and [src+3*ystride]-128*/
"movq %%mm2,%%mm4\n\t"
"punpcklbw %%mm7,%%mm2\n\t"
"movq %%mm3,%%mm5\n\t"
"punpckhbw %%mm7,%%mm4\n\t"
"psubw %%mm6,%%mm2\n\t"
"punpcklbw %%mm7,%%mm3\n\t"
"psubw %%mm6,%%mm4\n\t"
"punpckhbw %%mm7,%%mm5\n\t"
"psubw %%mm6,%%mm3\n\t"
"psubw %%mm6,%%mm5\n\t"
/*Write the answer out.*/
"movq %%mm2,0x20(%[residue])\n\t"
"movq %%mm4,0x28(%[residue])\n\t"
"movq %%mm3,0x30(%[residue])\n\t"
"movq %%mm5,0x38(%[residue])\n\t"
/*mm2=[src+6*ystride]*/
"movq (%[src],%[ystride],2),%%mm2\n\t"
/*mm3=[src+7*ystride]*/
"movq (%[src],%[ystride3]),%%mm3\n\t"
/*Compute [src+4*ystride]-128 and [src+5*ystride]-128*/
"movq %%mm0,%%mm4\n\t"
"punpcklbw %%mm7,%%mm0\n\t"
"movq %%mm1,%%mm5\n\t"
"punpckhbw %%mm7,%%mm4\n\t"
"psubw %%mm6,%%mm0\n\t"
"punpcklbw %%mm7,%%mm1\n\t"
"psubw %%mm6,%%mm4\n\t"
"punpckhbw %%mm7,%%mm5\n\t"
"psubw %%mm6,%%mm1\n\t"
"psubw %%mm6,%%mm5\n\t"
/*Write the answer out.*/
"movq %%mm0,0x40(%[residue])\n\t"
"movq %%mm4,0x48(%[residue])\n\t"
"movq %%mm1,0x50(%[residue])\n\t"
"movq %%mm5,0x58(%[residue])\n\t"
/*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/
"movq %%mm2,%%mm4\n\t"
"punpcklbw %%mm7,%%mm2\n\t"
"movq %%mm3,%%mm5\n\t"
"punpckhbw %%mm7,%%mm4\n\t"
"psubw %%mm6,%%mm2\n\t"
"punpcklbw %%mm7,%%mm3\n\t"
"psubw %%mm6,%%mm4\n\t"
"punpckhbw %%mm7,%%mm5\n\t"
"psubw %%mm6,%%mm3\n\t"
"psubw %%mm6,%%mm5\n\t"
/*Write the answer out.*/
"movq %%mm2,0x60(%[residue])\n\t"
"movq %%mm4,0x68(%[residue])\n\t"
"movq %%mm3,0x70(%[residue])\n\t"
"movq %%mm5,0x78(%[residue])\n\t"
:[src]"+r"(_src),[ystride3]"=&r"(ystride3)
:[residue]"r"(_residue),[ystride]"r"((ptrdiff_t)_ystride)
:"memory"
);
}
void oc_enc_frag_copy2_mmxext(unsigned char *_dst,
const unsigned char *_src1,const unsigned char *_src2,int _ystride){
oc_int_frag_copy2_mmxext(_dst,_ystride,_src1,_src2,_ystride);
}
#endif

View File

@ -0,0 +1,665 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************/
/*MMX fDCT implementation for x86_32*/
/*$Id: fdct_ses2.c 14579 2008-03-12 06:42:40Z xiphmont $*/
#include "x86enc.h"
#if defined(OC_X86_ASM)
# define OC_FDCT_STAGE1_8x4 \
"#OC_FDCT_STAGE1_8x4\n\t" \
/*Stage 1:*/ \
/*mm0=t7'=t0-t7*/ \
"psubw %%mm7,%%mm0\n\t" \
"paddw %%mm7,%%mm7\n\t" \
/*mm1=t6'=t1-t6*/ \
"psubw %%mm6,%%mm1\n\t" \
"paddw %%mm6,%%mm6\n\t" \
/*mm2=t5'=t2-t5*/ \
"psubw %%mm5,%%mm2\n\t" \
"paddw %%mm5,%%mm5\n\t" \
/*mm3=t4'=t3-t4*/ \
"psubw %%mm4,%%mm3\n\t" \
"paddw %%mm4,%%mm4\n\t" \
/*mm7=t0'=t0+t7*/ \
"paddw %%mm0,%%mm7\n\t" \
/*mm6=t1'=t1+t6*/ \
"paddw %%mm1,%%mm6\n\t" \
/*mm5=t2'=t2+t5*/ \
"paddw %%mm2,%%mm5\n\t" \
/*mm4=t3'=t3+t4*/ \
"paddw %%mm3,%%mm4\n\t" \
# define OC_FDCT8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) \
"#OC_FDCT8x4\n\t" \
/*Stage 2:*/ \
/*mm7=t3''=t0'-t3'*/ \
"psubw %%mm4,%%mm7\n\t" \
"paddw %%mm4,%%mm4\n\t" \
/*mm6=t2''=t1'-t2'*/ \
"psubw %%mm5,%%mm6\n\t" \
"movq %%mm7,"_r6"(%[y])\n\t" \
"paddw %%mm5,%%mm5\n\t" \
/*mm1=t5''=t6'-t5'*/ \
"psubw %%mm2,%%mm1\n\t" \
"movq %%mm6,"_r2"(%[y])\n\t" \
/*mm4=t0''=t0'+t3'*/ \
"paddw %%mm7,%%mm4\n\t" \
"paddw %%mm2,%%mm2\n\t" \
/*mm5=t1''=t1'+t2'*/ \
"movq %%mm4,"_r0"(%[y])\n\t" \
"paddw %%mm6,%%mm5\n\t" \
/*mm2=t6''=t6'+t5'*/ \
"paddw %%mm1,%%mm2\n\t" \
"movq %%mm5,"_r4"(%[y])\n\t" \
/*mm0=t7', mm1=t5'', mm2=t6'', mm3=t4'.*/ \
/*mm4, mm5, mm6, mm7 are free.*/ \
/*Stage 3:*/ \
/*mm6={2}x4, mm7={27146,0xB500>>1}x2*/ \
"mov $0x5A806A0A,%[a]\n\t" \
"pcmpeqb %%mm6,%%mm6\n\t" \
"movd %[a],%%mm7\n\t" \
"psrlw $15,%%mm6\n\t" \
"punpckldq %%mm7,%%mm7\n\t" \
"paddw %%mm6,%%mm6\n\t" \
/*mm0=0, m2={-1}x4 \
mm5:mm4=t5''*27146+0xB500*/ \
"movq %%mm1,%%mm4\n\t" \
"movq %%mm1,%%mm5\n\t" \
"punpcklwd %%mm6,%%mm4\n\t" \
"movq %%mm2,"_r3"(%[y])\n\t" \
"pmaddwd %%mm7,%%mm4\n\t" \
"movq %%mm0,"_r7"(%[y])\n\t" \
"punpckhwd %%mm6,%%mm5\n\t" \
"pxor %%mm0,%%mm0\n\t" \
"pmaddwd %%mm7,%%mm5\n\t" \
"pcmpeqb %%mm2,%%mm2\n\t" \
/*mm2=t6'', mm1=t5''+(t5''!=0) \
mm4=(t5''*27146+0xB500>>16)*/ \
"pcmpeqw %%mm1,%%mm0\n\t" \
"psrad $16,%%mm4\n\t" \
"psubw %%mm2,%%mm0\n\t" \
"movq "_r3"(%[y]),%%mm2\n\t" \
"psrad $16,%%mm5\n\t" \
"paddw %%mm0,%%mm1\n\t" \
"packssdw %%mm5,%%mm4\n\t" \
/*mm4=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \
"paddw %%mm1,%%mm4\n\t" \
"movq "_r7"(%[y]),%%mm0\n\t" \
"psraw $1,%%mm4\n\t" \
"movq %%mm3,%%mm1\n\t" \
/*mm3=t4''=t4'+s*/ \
"paddw %%mm4,%%mm3\n\t" \
/*mm1=t5'''=t4'-s*/ \
"psubw %%mm4,%%mm1\n\t" \
/*mm1=0, mm3={-1}x4 \
mm5:mm4=t6''*27146+0xB500*/ \
"movq %%mm2,%%mm4\n\t" \
"movq %%mm2,%%mm5\n\t" \
"punpcklwd %%mm6,%%mm4\n\t" \
"movq %%mm1,"_r5"(%[y])\n\t" \
"pmaddwd %%mm7,%%mm4\n\t" \
"movq %%mm3,"_r1"(%[y])\n\t" \
"punpckhwd %%mm6,%%mm5\n\t" \
"pxor %%mm1,%%mm1\n\t" \
"pmaddwd %%mm7,%%mm5\n\t" \
"pcmpeqb %%mm3,%%mm3\n\t" \
/*mm2=t6''+(t6''!=0), mm4=(t6''*27146+0xB500>>16)*/ \
"psrad $16,%%mm4\n\t" \
"pcmpeqw %%mm2,%%mm1\n\t" \
"psrad $16,%%mm5\n\t" \
"psubw %%mm3,%%mm1\n\t" \
"packssdw %%mm5,%%mm4\n\t" \
"paddw %%mm1,%%mm2\n\t" \
/*mm1=t1'' \
mm4=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \
"paddw %%mm2,%%mm4\n\t" \
"movq "_r4"(%[y]),%%mm1\n\t" \
"psraw $1,%%mm4\n\t" \
"movq %%mm0,%%mm2\n\t" \
/*mm7={54491-0x7FFF,0x7FFF}x2 \
mm0=t7''=t7'+s*/ \
"paddw %%mm4,%%mm0\n\t" \
/*mm2=t6'''=t7'-s*/ \
"psubw %%mm4,%%mm2\n\t" \
/*Stage 4:*/ \
/*mm0=0, mm2=t0'' \
mm5:mm4=t1''*27146+0xB500*/ \
"movq %%mm1,%%mm4\n\t" \
"movq %%mm1,%%mm5\n\t" \
"punpcklwd %%mm6,%%mm4\n\t" \
"movq %%mm2,"_r3"(%[y])\n\t" \
"pmaddwd %%mm7,%%mm4\n\t" \
"movq "_r0"(%[y]),%%mm2\n\t" \
"punpckhwd %%mm6,%%mm5\n\t" \
"movq %%mm0,"_r7"(%[y])\n\t" \
"pmaddwd %%mm7,%%mm5\n\t" \
"pxor %%mm0,%%mm0\n\t" \
/*mm7={27146,0x4000>>1}x2 \
mm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \
"psrad $16,%%mm4\n\t" \
"mov $0x20006A0A,%[a]\n\t" \
"pcmpeqw %%mm1,%%mm0\n\t" \
"movd %[a],%%mm7\n\t" \
"psrad $16,%%mm5\n\t" \
"psubw %%mm3,%%mm0\n\t" \
"packssdw %%mm5,%%mm4\n\t" \
"paddw %%mm1,%%mm0\n\t" \
"punpckldq %%mm7,%%mm7\n\t" \
"paddw %%mm4,%%mm0\n\t" \
/*mm6={0x00000E3D}x2 \
mm1=-(t0''==0), mm5:mm4=t0''*27146+0x4000*/ \
"movq %%mm2,%%mm4\n\t" \
"movq %%mm2,%%mm5\n\t" \
"punpcklwd %%mm6,%%mm4\n\t" \
"mov $0x0E3D,%[a]\n\t" \
"pmaddwd %%mm7,%%mm4\n\t" \
"punpckhwd %%mm6,%%mm5\n\t" \
"movd %[a],%%mm6\n\t" \
"pmaddwd %%mm7,%%mm5\n\t" \
"pxor %%mm1,%%mm1\n\t" \
"punpckldq %%mm6,%%mm6\n\t" \
"pcmpeqw %%mm2,%%mm1\n\t" \
/*mm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \
"psrad $16,%%mm4\n\t" \
"psubw %%mm3,%%mm1\n\t" \
"psrad $16,%%mm5\n\t" \
"paddw %%mm1,%%mm2\n\t" \
"packssdw %%mm5,%%mm4\n\t" \
"movq "_r5"(%[y]),%%mm1\n\t" \
"paddw %%mm2,%%mm4\n\t" \
/*mm2=t6'', mm0=_y[0]=u=r+s>>1 \
The naive implementation could cause overflow, so we use \
u=(r&s)+((r^s)>>1).*/ \
"movq "_r3"(%[y]),%%mm2\n\t" \
"movq %%mm0,%%mm7\n\t" \
"pxor %%mm4,%%mm0\n\t" \
"pand %%mm4,%%mm7\n\t" \
"psraw $1,%%mm0\n\t" \
"mov $0x7FFF54DC,%[a]\n\t" \
"paddw %%mm7,%%mm0\n\t" \
"movd %[a],%%mm7\n\t" \
/*mm7={54491-0x7FFF,0x7FFF}x2 \
mm4=_y[4]=v=r-u*/ \
"psubw %%mm0,%%mm4\n\t" \
"punpckldq %%mm7,%%mm7\n\t" \
"movq %%mm4,"_r4"(%[y])\n\t" \
/*mm0=0, mm7={36410}x4 \
mm1=(t5'''!=0), mm5:mm4=54491*t5'''+0x0E3D*/ \
"movq %%mm1,%%mm4\n\t" \
"movq %%mm1,%%mm5\n\t" \
"punpcklwd %%mm1,%%mm4\n\t" \
"mov $0x8E3A8E3A,%[a]\n\t" \
"pmaddwd %%mm7,%%mm4\n\t" \
"movq %%mm0,"_r0"(%[y])\n\t" \
"punpckhwd %%mm1,%%mm5\n\t" \
"pxor %%mm0,%%mm0\n\t" \
"pmaddwd %%mm7,%%mm5\n\t" \
"pcmpeqw %%mm0,%%mm1\n\t" \
"movd %[a],%%mm7\n\t" \
"psubw %%mm3,%%mm1\n\t" \
"punpckldq %%mm7,%%mm7\n\t" \
"paddd %%mm6,%%mm4\n\t" \
"paddd %%mm6,%%mm5\n\t" \
/*mm0=0 \
mm3:mm1=36410*t6'''+((t5'''!=0)<<16)*/ \
"movq %%mm2,%%mm6\n\t" \
"movq %%mm2,%%mm3\n\t" \
"pmulhw %%mm7,%%mm6\n\t" \
"paddw %%mm2,%%mm1\n\t" \
"pmullw %%mm7,%%mm3\n\t" \
"pxor %%mm0,%%mm0\n\t" \
"paddw %%mm1,%%mm6\n\t" \
"movq %%mm3,%%mm1\n\t" \
"punpckhwd %%mm6,%%mm3\n\t" \
"punpcklwd %%mm6,%%mm1\n\t" \
/*mm3={-1}x4, mm6={1}x4 \
mm4=_y[5]=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \
"paddd %%mm3,%%mm5\n\t" \
"paddd %%mm1,%%mm4\n\t" \
"psrad $16,%%mm5\n\t" \
"pxor %%mm6,%%mm6\n\t" \
"psrad $16,%%mm4\n\t" \
"pcmpeqb %%mm3,%%mm3\n\t" \
"packssdw %%mm5,%%mm4\n\t" \
"psubw %%mm3,%%mm6\n\t" \
/*mm1=t7'', mm7={26568,0x3400}x2 \
mm2=s=t6'''-(36410*u>>16)*/ \
"movq %%mm4,%%mm1\n\t" \
"mov $0x340067C8,%[a]\n\t" \
"pmulhw %%mm7,%%mm4\n\t" \
"movd %[a],%%mm7\n\t" \
"movq %%mm1,"_r5"(%[y])\n\t" \
"punpckldq %%mm7,%%mm7\n\t" \
"paddw %%mm1,%%mm4\n\t" \
"movq "_r7"(%[y]),%%mm1\n\t" \
"psubw %%mm4,%%mm2\n\t" \
/*mm6={0x00007B1B}x2 \
mm0=(s!=0), mm5:mm4=s*26568+0x3400*/ \
"movq %%mm2,%%mm4\n\t" \
"movq %%mm2,%%mm5\n\t" \
"punpcklwd %%mm6,%%mm4\n\t" \
"pcmpeqw %%mm2,%%mm0\n\t" \
"pmaddwd %%mm7,%%mm4\n\t" \
"mov $0x7B1B,%[a]\n\t" \
"punpckhwd %%mm6,%%mm5\n\t" \
"movd %[a],%%mm6\n\t" \
"pmaddwd %%mm7,%%mm5\n\t" \
"psubw %%mm3,%%mm0\n\t" \
"punpckldq %%mm6,%%mm6\n\t" \
/*mm7={64277-0x7FFF,0x7FFF}x2 \
mm2=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \
"psrad $17,%%mm4\n\t" \
"paddw %%mm0,%%mm2\n\t" \
"psrad $17,%%mm5\n\t" \
"mov $0x7FFF7B16,%[a]\n\t" \
"packssdw %%mm5,%%mm4\n\t" \
"movd %[a],%%mm7\n\t" \
"paddw %%mm4,%%mm2\n\t" \
"punpckldq %%mm7,%%mm7\n\t" \
/*mm0=0, mm7={12785}x4 \
mm1=(t7''!=0), mm2=t4'', mm5:mm4=64277*t7''+0x7B1B*/ \
"movq %%mm1,%%mm4\n\t" \
"movq %%mm1,%%mm5\n\t" \
"movq %%mm2,"_r3"(%[y])\n\t" \
"punpcklwd %%mm1,%%mm4\n\t" \
"movq "_r1"(%[y]),%%mm2\n\t" \
"pmaddwd %%mm7,%%mm4\n\t" \
"mov $0x31F131F1,%[a]\n\t" \
"punpckhwd %%mm1,%%mm5\n\t" \
"pxor %%mm0,%%mm0\n\t" \
"pmaddwd %%mm7,%%mm5\n\t" \
"pcmpeqw %%mm0,%%mm1\n\t" \
"movd %[a],%%mm7\n\t" \
"psubw %%mm3,%%mm1\n\t" \
"punpckldq %%mm7,%%mm7\n\t" \
"paddd %%mm6,%%mm4\n\t" \
"paddd %%mm6,%%mm5\n\t" \
/*mm3:mm1=12785*t4'''+((t7''!=0)<<16)*/ \
"movq %%mm2,%%mm6\n\t" \
"movq %%mm2,%%mm3\n\t" \
"pmulhw %%mm7,%%mm6\n\t" \
"pmullw %%mm7,%%mm3\n\t" \
"paddw %%mm1,%%mm6\n\t" \
"movq %%mm3,%%mm1\n\t" \
"punpckhwd %%mm6,%%mm3\n\t" \
"punpcklwd %%mm6,%%mm1\n\t" \
/*mm3={-1}x4, mm6={1}x4 \
mm4=_y[1]=u=(12785*t4'''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \
"paddd %%mm3,%%mm5\n\t" \
"paddd %%mm1,%%mm4\n\t" \
"psrad $16,%%mm5\n\t" \
"pxor %%mm6,%%mm6\n\t" \
"psrad $16,%%mm4\n\t" \
"pcmpeqb %%mm3,%%mm3\n\t" \
"packssdw %%mm5,%%mm4\n\t" \
"psubw %%mm3,%%mm6\n\t" \
/*mm1=t3'', mm7={20539,0x3000}x2 \
mm4=s=(12785*u>>16)-t4''*/ \
"movq %%mm4,"_r1"(%[y])\n\t" \
"pmulhw %%mm7,%%mm4\n\t" \
"mov $0x3000503B,%[a]\n\t" \
"movq "_r6"(%[y]),%%mm1\n\t" \
"movd %[a],%%mm7\n\t" \
"psubw %%mm2,%%mm4\n\t" \
"punpckldq %%mm7,%%mm7\n\t" \
/*mm6={0x00006CB7}x2 \
mm0=(s!=0), mm5:mm4=s*20539+0x3000*/ \
"movq %%mm4,%%mm5\n\t" \
"movq %%mm4,%%mm2\n\t" \
"punpcklwd %%mm6,%%mm4\n\t" \
"pcmpeqw %%mm2,%%mm0\n\t" \
"pmaddwd %%mm7,%%mm4\n\t" \
"mov $0x6CB7,%[a]\n\t" \
"punpckhwd %%mm6,%%mm5\n\t" \
"movd %[a],%%mm6\n\t" \
"pmaddwd %%mm7,%%mm5\n\t" \
"psubw %%mm3,%%mm0\n\t" \
"punpckldq %%mm6,%%mm6\n\t" \
/*mm7={60547-0x7FFF,0x7FFF}x2 \
mm2=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \
"psrad $20,%%mm4\n\t" \
"paddw %%mm0,%%mm2\n\t" \
"psrad $20,%%mm5\n\t" \
"mov $0x7FFF6C84,%[a]\n\t" \
"packssdw %%mm5,%%mm4\n\t" \
"movd %[a],%%mm7\n\t" \
"paddw %%mm4,%%mm2\n\t" \
"punpckldq %%mm7,%%mm7\n\t" \
/*mm0=0, mm7={25080}x4 \
mm2=t2'', mm5:mm4=60547*t3''+0x6CB7*/ \
"movq %%mm1,%%mm4\n\t" \
"movq %%mm1,%%mm5\n\t" \
"movq %%mm2,"_r7"(%[y])\n\t" \
"punpcklwd %%mm1,%%mm4\n\t" \
"movq "_r2"(%[y]),%%mm2\n\t" \
"pmaddwd %%mm7,%%mm4\n\t" \
"mov $0x61F861F8,%[a]\n\t" \
"punpckhwd %%mm1,%%mm5\n\t" \
"pxor %%mm0,%%mm0\n\t" \
"pmaddwd %%mm7,%%mm5\n\t" \
"movd %[a],%%mm7\n\t" \
"pcmpeqw %%mm0,%%mm1\n\t" \
"psubw %%mm3,%%mm1\n\t" \
"punpckldq %%mm7,%%mm7\n\t" \
"paddd %%mm6,%%mm4\n\t" \
"paddd %%mm6,%%mm5\n\t" \
/*mm3:mm1=25080*t2''+((t3''!=0)<<16)*/ \
"movq %%mm2,%%mm6\n\t" \
"movq %%mm2,%%mm3\n\t" \
"pmulhw %%mm7,%%mm6\n\t" \
"pmullw %%mm7,%%mm3\n\t" \
"paddw %%mm1,%%mm6\n\t" \
"movq %%mm3,%%mm1\n\t" \
"punpckhwd %%mm6,%%mm3\n\t" \
"punpcklwd %%mm6,%%mm1\n\t" \
/*mm1={-1}x4 \
mm4=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \
"paddd %%mm3,%%mm5\n\t" \
"paddd %%mm1,%%mm4\n\t" \
"psrad $16,%%mm5\n\t" \
"mov $0x28005460,%[a]\n\t" \
"psrad $16,%%mm4\n\t" \
"pcmpeqb %%mm1,%%mm1\n\t" \
"packssdw %%mm5,%%mm4\n\t" \
/*mm5={1}x4, mm6=_y[2]=u, mm7={21600,0x2800}x2 \
mm4=s=(25080*u>>16)-t2''*/ \
"movq %%mm4,%%mm6\n\t" \
"pmulhw %%mm7,%%mm4\n\t" \
"pxor %%mm5,%%mm5\n\t" \
"movd %[a],%%mm7\n\t" \
"psubw %%mm1,%%mm5\n\t" \
"punpckldq %%mm7,%%mm7\n\t" \
"psubw %%mm2,%%mm4\n\t" \
/*mm2=s+(s!=0) \
mm4:mm3=s*21600+0x2800*/ \
"movq %%mm4,%%mm3\n\t" \
"movq %%mm4,%%mm2\n\t" \
"punpckhwd %%mm5,%%mm4\n\t" \
"pcmpeqw %%mm2,%%mm0\n\t" \
"pmaddwd %%mm7,%%mm4\n\t" \
"psubw %%mm1,%%mm0\n\t" \
"punpcklwd %%mm5,%%mm3\n\t" \
"paddw %%mm0,%%mm2\n\t" \
"pmaddwd %%mm7,%%mm3\n\t" \
/*mm0=_y[4], mm1=_y[7], mm4=_y[0], mm5=_y[5] \
mm3=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \
"movq "_r4"(%[y]),%%mm0\n\t" \
"psrad $18,%%mm4\n\t" \
"movq "_r5"(%[y]),%%mm5\n\t" \
"psrad $18,%%mm3\n\t" \
"movq "_r7"(%[y]),%%mm1\n\t" \
"packssdw %%mm4,%%mm3\n\t" \
"movq "_r0"(%[y]),%%mm4\n\t" \
"paddw %%mm2,%%mm3\n\t" \
/*On input, mm4=_y[0], mm6=_y[2], mm0=_y[4], mm5=_y[5], mm3=_y[6], mm1=_y[7].
On output, {_y[4],mm1,mm2,mm3} contains the transpose of _y[4...7] and
{mm4,mm5,mm6,mm7} contains the transpose of _y[0...3].*/
# define OC_TRANSPOSE8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) \
"#OC_TRANSPOSE8x4\n\t" \
/*First 4x4 transpose:*/ \
/*mm0 = e3 e2 e1 e0 \
mm5 = f3 f2 f1 f0 \
mm3 = g3 g2 g1 g0 \
mm1 = h3 h2 h1 h0*/ \
"movq %%mm0,%%mm2\n\t" \
"punpcklwd %%mm5,%%mm0\n\t" \
"punpckhwd %%mm5,%%mm2\n\t" \
"movq %%mm3,%%mm5\n\t" \
"punpcklwd %%mm1,%%mm3\n\t" \
"punpckhwd %%mm1,%%mm5\n\t" \
/*mm0 = f1 e1 f0 e0 \
mm2 = f3 e3 f2 e2 \
mm3 = h1 g1 h0 g0 \
mm5 = h3 g3 h2 g2*/ \
"movq %%mm0,%%mm1\n\t" \
"punpckldq %%mm3,%%mm0\n\t" \
"movq %%mm0,"_r4"(%[y])\n\t" \
"punpckhdq %%mm3,%%mm1\n\t" \
"movq "_r1"(%[y]),%%mm0\n\t" \
"movq %%mm2,%%mm3\n\t" \
"punpckldq %%mm5,%%mm2\n\t" \
"punpckhdq %%mm5,%%mm3\n\t" \
"movq "_r3"(%[y]),%%mm5\n\t" \
/*_y[4] = h0 g0 f0 e0 \
mm1 = h1 g1 f1 e1 \
mm2 = h2 g2 f2 e2 \
mm3 = h3 g3 f3 e3*/ \
/*Second 4x4 transpose:*/ \
/*mm4 = a3 a2 a1 a0 \
mm0 = b3 b2 b1 b0 \
mm6 = c3 c2 c1 c0 \
mm5 = d3 d2 d1 d0*/ \
"movq %%mm4,%%mm7\n\t" \
"punpcklwd %%mm0,%%mm4\n\t" \
"punpckhwd %%mm0,%%mm7\n\t" \
"movq %%mm6,%%mm0\n\t" \
"punpcklwd %%mm5,%%mm6\n\t" \
"punpckhwd %%mm5,%%mm0\n\t" \
/*mm4 = b1 a1 b0 a0 \
mm7 = b3 a3 b2 a2 \
mm6 = d1 c1 d0 c0 \
mm0 = d3 c3 d2 c2*/ \
"movq %%mm4,%%mm5\n\t" \
"punpckldq %%mm6,%%mm4\n\t" \
"punpckhdq %%mm6,%%mm5\n\t" \
"movq %%mm7,%%mm6\n\t" \
"punpckhdq %%mm0,%%mm7\n\t" \
"punpckldq %%mm0,%%mm6\n\t" \
/*mm4 = d0 c0 b0 a0 \
mm5 = d1 c1 b1 a1 \
mm6 = d2 c2 b2 a2 \
mm7 = d3 c3 b3 a3*/ \
/*MMX implementation of the fDCT.*/
void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
ptrdiff_t a;
__asm__ __volatile__(
/*Add two extra bits of working precision to improve accuracy; any more and
we could overflow.*/
/*We also add biases to correct for some systematic error that remains in
the full fDCT->iDCT round trip.*/
"movq 0x00(%[x]),%%mm0\n\t"
"movq 0x10(%[x]),%%mm1\n\t"
"movq 0x20(%[x]),%%mm2\n\t"
"movq 0x30(%[x]),%%mm3\n\t"
"pcmpeqb %%mm4,%%mm4\n\t"
"pxor %%mm7,%%mm7\n\t"
"movq %%mm0,%%mm5\n\t"
"psllw $2,%%mm0\n\t"
"pcmpeqw %%mm7,%%mm5\n\t"
"movq 0x70(%[x]),%%mm7\n\t"
"psllw $2,%%mm1\n\t"
"psubw %%mm4,%%mm5\n\t"
"psllw $2,%%mm2\n\t"
"mov $1,%[a]\n\t"
"pslld $16,%%mm5\n\t"
"movd %[a],%%mm6\n\t"
"psllq $16,%%mm5\n\t"
"mov $0x10001,%[a]\n\t"
"psllw $2,%%mm3\n\t"
"movd %[a],%%mm4\n\t"
"punpckhwd %%mm6,%%mm5\n\t"
"psubw %%mm6,%%mm1\n\t"
"movq 0x60(%[x]),%%mm6\n\t"
"paddw %%mm5,%%mm0\n\t"
"movq 0x50(%[x]),%%mm5\n\t"
"paddw %%mm4,%%mm0\n\t"
"movq 0x40(%[x]),%%mm4\n\t"
/*We inline stage1 of the transform here so we can get better instruction
scheduling with the shifts.*/
/*mm0=t7'=t0-t7*/
"psllw $2,%%mm7\n\t"
"psubw %%mm7,%%mm0\n\t"
"psllw $2,%%mm6\n\t"
"paddw %%mm7,%%mm7\n\t"
/*mm1=t6'=t1-t6*/
"psllw $2,%%mm5\n\t"
"psubw %%mm6,%%mm1\n\t"
"psllw $2,%%mm4\n\t"
"paddw %%mm6,%%mm6\n\t"
/*mm2=t5'=t2-t5*/
"psubw %%mm5,%%mm2\n\t"
"paddw %%mm5,%%mm5\n\t"
/*mm3=t4'=t3-t4*/
"psubw %%mm4,%%mm3\n\t"
"paddw %%mm4,%%mm4\n\t"
/*mm7=t0'=t0+t7*/
"paddw %%mm0,%%mm7\n\t"
/*mm6=t1'=t1+t6*/
"paddw %%mm1,%%mm6\n\t"
/*mm5=t2'=t2+t5*/
"paddw %%mm2,%%mm5\n\t"
/*mm4=t3'=t3+t4*/
"paddw %%mm3,%%mm4\n\t"
OC_FDCT8x4("0x00","0x10","0x20","0x30","0x40","0x50","0x60","0x70")
OC_TRANSPOSE8x4("0x00","0x10","0x20","0x30","0x40","0x50","0x60","0x70")
/*Swap out this 8x4 block for the next one.*/
"movq 0x08(%[x]),%%mm0\n\t"
"movq %%mm7,0x30(%[y])\n\t"
"movq 0x78(%[x]),%%mm7\n\t"
"movq %%mm1,0x50(%[y])\n\t"
"movq 0x18(%[x]),%%mm1\n\t"
"movq %%mm6,0x20(%[y])\n\t"
"movq 0x68(%[x]),%%mm6\n\t"
"movq %%mm2,0x60(%[y])\n\t"
"movq 0x28(%[x]),%%mm2\n\t"
"movq %%mm5,0x10(%[y])\n\t"
"movq 0x58(%[x]),%%mm5\n\t"
"movq %%mm3,0x70(%[y])\n\t"
"movq 0x38(%[x]),%%mm3\n\t"
/*And increase its working precision, too.*/
"psllw $2,%%mm0\n\t"
"movq %%mm4,0x00(%[y])\n\t"
"psllw $2,%%mm7\n\t"
"movq 0x48(%[x]),%%mm4\n\t"
/*We inline stage1 of the transform here so we can get better instruction
scheduling with the shifts.*/
/*mm0=t7'=t0-t7*/
"psubw %%mm7,%%mm0\n\t"
"psllw $2,%%mm1\n\t"
"paddw %%mm7,%%mm7\n\t"
"psllw $2,%%mm6\n\t"
/*mm1=t6'=t1-t6*/
"psubw %%mm6,%%mm1\n\t"
"psllw $2,%%mm2\n\t"
"paddw %%mm6,%%mm6\n\t"
"psllw $2,%%mm5\n\t"
/*mm2=t5'=t2-t5*/
"psubw %%mm5,%%mm2\n\t"
"psllw $2,%%mm3\n\t"
"paddw %%mm5,%%mm5\n\t"
"psllw $2,%%mm4\n\t"
/*mm3=t4'=t3-t4*/
"psubw %%mm4,%%mm3\n\t"
"paddw %%mm4,%%mm4\n\t"
/*mm7=t0'=t0+t7*/
"paddw %%mm0,%%mm7\n\t"
/*mm6=t1'=t1+t6*/
"paddw %%mm1,%%mm6\n\t"
/*mm5=t2'=t2+t5*/
"paddw %%mm2,%%mm5\n\t"
/*mm4=t3'=t3+t4*/
"paddw %%mm3,%%mm4\n\t"
OC_FDCT8x4("0x08","0x18","0x28","0x38","0x48","0x58","0x68","0x78")
OC_TRANSPOSE8x4("0x08","0x18","0x28","0x38","0x48","0x58","0x68","0x78")
/*Here the first 4x4 block of output from the last transpose is the second
4x4 block of input for the next transform.
We have cleverly arranged that it already be in the appropriate place,
so we only have to do half the stores and loads.*/
"movq 0x00(%[y]),%%mm0\n\t"
"movq %%mm1,0x58(%[y])\n\t"
"movq 0x10(%[y]),%%mm1\n\t"
"movq %%mm2,0x68(%[y])\n\t"
"movq 0x20(%[y]),%%mm2\n\t"
"movq %%mm3,0x78(%[y])\n\t"
"movq 0x30(%[y]),%%mm3\n\t"
OC_FDCT_STAGE1_8x4
OC_FDCT8x4("0x00","0x10","0x20","0x30","0x08","0x18","0x28","0x38")
OC_TRANSPOSE8x4("0x00","0x10","0x20","0x30","0x08","0x18","0x28","0x38")
/*mm0={-2}x4*/
"pcmpeqw %%mm0,%%mm0\n\t"
"paddw %%mm0,%%mm0\n\t"
/*Round the results.*/
"psubw %%mm0,%%mm1\n\t"
"psubw %%mm0,%%mm2\n\t"
"psraw $2,%%mm1\n\t"
"psubw %%mm0,%%mm3\n\t"
"movq %%mm1,0x18(%[y])\n\t"
"psraw $2,%%mm2\n\t"
"psubw %%mm0,%%mm4\n\t"
"movq 0x08(%[y]),%%mm1\n\t"
"psraw $2,%%mm3\n\t"
"psubw %%mm0,%%mm5\n\t"
"psraw $2,%%mm4\n\t"
"psubw %%mm0,%%mm6\n\t"
"psraw $2,%%mm5\n\t"
"psubw %%mm0,%%mm7\n\t"
"psraw $2,%%mm6\n\t"
"psubw %%mm0,%%mm1\n\t"
"psraw $2,%%mm7\n\t"
"movq 0x40(%[y]),%%mm0\n\t"
"psraw $2,%%mm1\n\t"
"movq %%mm7,0x30(%[y])\n\t"
"movq 0x78(%[y]),%%mm7\n\t"
"movq %%mm1,0x08(%[y])\n\t"
"movq 0x50(%[y]),%%mm1\n\t"
"movq %%mm6,0x20(%[y])\n\t"
"movq 0x68(%[y]),%%mm6\n\t"
"movq %%mm2,0x28(%[y])\n\t"
"movq 0x60(%[y]),%%mm2\n\t"
"movq %%mm5,0x10(%[y])\n\t"
"movq 0x58(%[y]),%%mm5\n\t"
"movq %%mm3,0x38(%[y])\n\t"
"movq 0x70(%[y]),%%mm3\n\t"
"movq %%mm4,0x00(%[y])\n\t"
"movq 0x48(%[y]),%%mm4\n\t"
OC_FDCT_STAGE1_8x4
OC_FDCT8x4("0x40","0x50","0x60","0x70","0x48","0x58","0x68","0x78")
OC_TRANSPOSE8x4("0x40","0x50","0x60","0x70","0x48","0x58","0x68","0x78")
/*mm0={-2}x4*/
"pcmpeqw %%mm0,%%mm0\n\t"
"paddw %%mm0,%%mm0\n\t"
/*Round the results.*/
"psubw %%mm0,%%mm1\n\t"
"psubw %%mm0,%%mm2\n\t"
"psraw $2,%%mm1\n\t"
"psubw %%mm0,%%mm3\n\t"
"movq %%mm1,0x58(%[y])\n\t"
"psraw $2,%%mm2\n\t"
"psubw %%mm0,%%mm4\n\t"
"movq 0x48(%[y]),%%mm1\n\t"
"psraw $2,%%mm3\n\t"
"psubw %%mm0,%%mm5\n\t"
"movq %%mm2,0x68(%[y])\n\t"
"psraw $2,%%mm4\n\t"
"psubw %%mm0,%%mm6\n\t"
"movq %%mm3,0x78(%[y])\n\t"
"psraw $2,%%mm5\n\t"
"psubw %%mm0,%%mm7\n\t"
"movq %%mm4,0x40(%[y])\n\t"
"psraw $2,%%mm6\n\t"
"psubw %%mm0,%%mm1\n\t"
"movq %%mm5,0x50(%[y])\n\t"
"psraw $2,%%mm7\n\t"
"movq %%mm6,0x60(%[y])\n\t"
"psraw $2,%%mm1\n\t"
"movq %%mm7,0x70(%[y])\n\t"
"movq %%mm1,0x48(%[y])\n\t"
:[a]"=&r"(a)
:[y]"r"(_y),[x]"r"(_x)
:"memory"
);
}
#endif

View File

@ -0,0 +1,293 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: mmxfrag.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
/*MMX acceleration of fragment reconstruction for motion compensation.
Originally written by Rudolf Marek.
Additional optimization by Nils Pipenbrinck.
Note: Loops are unrolled for best performance.
The iteration each instruction belongs to is marked in the comments as #i.*/
#include <stddef.h>
#include "x86int.h"
#include "mmxfrag.h"
#if defined(OC_X86_ASM)
/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes
between rows.*/
void oc_frag_copy_mmx(unsigned char *_dst,
const unsigned char *_src,int _ystride){
OC_FRAG_COPY_MMX(_dst,_src,_ystride);
}
void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride,
const ogg_int16_t *_residue){
__asm__ __volatile__(
/*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/
"pcmpeqw %%mm0,%%mm0\n\t"
/*#0 Load low residue.*/
"movq 0*8(%[residue]),%%mm1\n\t"
/*#0 Load high residue.*/
"movq 1*8(%[residue]),%%mm2\n\t"
/*Set mm0 to 0x8000800080008000.*/
"psllw $15,%%mm0\n\t"
/*#1 Load low residue.*/
"movq 2*8(%[residue]),%%mm3\n\t"
/*#1 Load high residue.*/
"movq 3*8(%[residue]),%%mm4\n\t"
/*Set mm0 to 0x0080008000800080.*/
"psrlw $8,%%mm0\n\t"
/*#2 Load low residue.*/
"movq 4*8(%[residue]),%%mm5\n\t"
/*#2 Load high residue.*/
"movq 5*8(%[residue]),%%mm6\n\t"
/*#0 Bias low residue.*/
"paddsw %%mm0,%%mm1\n\t"
/*#0 Bias high residue.*/
"paddsw %%mm0,%%mm2\n\t"
/*#0 Pack to byte.*/
"packuswb %%mm2,%%mm1\n\t"
/*#1 Bias low residue.*/
"paddsw %%mm0,%%mm3\n\t"
/*#1 Bias high residue.*/
"paddsw %%mm0,%%mm4\n\t"
/*#1 Pack to byte.*/
"packuswb %%mm4,%%mm3\n\t"
/*#2 Bias low residue.*/
"paddsw %%mm0,%%mm5\n\t"
/*#2 Bias high residue.*/
"paddsw %%mm0,%%mm6\n\t"
/*#2 Pack to byte.*/
"packuswb %%mm6,%%mm5\n\t"
/*#0 Write row.*/
"movq %%mm1,(%[dst])\n\t"
/*#1 Write row.*/
"movq %%mm3,(%[dst],%[ystride])\n\t"
/*#2 Write row.*/
"movq %%mm5,(%[dst],%[ystride],2)\n\t"
/*#3 Load low residue.*/
"movq 6*8(%[residue]),%%mm1\n\t"
/*#3 Load high residue.*/
"movq 7*8(%[residue]),%%mm2\n\t"
/*#4 Load high residue.*/
"movq 8*8(%[residue]),%%mm3\n\t"
/*#4 Load high residue.*/
"movq 9*8(%[residue]),%%mm4\n\t"
/*#5 Load high residue.*/
"movq 10*8(%[residue]),%%mm5\n\t"
/*#5 Load high residue.*/
"movq 11*8(%[residue]),%%mm6\n\t"
/*#3 Bias low residue.*/
"paddsw %%mm0,%%mm1\n\t"
/*#3 Bias high residue.*/
"paddsw %%mm0,%%mm2\n\t"
/*#3 Pack to byte.*/
"packuswb %%mm2,%%mm1\n\t"
/*#4 Bias low residue.*/
"paddsw %%mm0,%%mm3\n\t"
/*#4 Bias high residue.*/
"paddsw %%mm0,%%mm4\n\t"
/*#4 Pack to byte.*/
"packuswb %%mm4,%%mm3\n\t"
/*#5 Bias low residue.*/
"paddsw %%mm0,%%mm5\n\t"
/*#5 Bias high residue.*/
"paddsw %%mm0,%%mm6\n\t"
/*#5 Pack to byte.*/
"packuswb %%mm6,%%mm5\n\t"
/*#3 Write row.*/
"movq %%mm1,(%[dst],%[ystride3])\n\t"
/*#4 Write row.*/
"movq %%mm3,(%[dst4])\n\t"
/*#5 Write row.*/
"movq %%mm5,(%[dst4],%[ystride])\n\t"
/*#6 Load low residue.*/
"movq 12*8(%[residue]),%%mm1\n\t"
/*#6 Load high residue.*/
"movq 13*8(%[residue]),%%mm2\n\t"
/*#7 Load low residue.*/
"movq 14*8(%[residue]),%%mm3\n\t"
/*#7 Load high residue.*/
"movq 15*8(%[residue]),%%mm4\n\t"
/*#6 Bias low residue.*/
"paddsw %%mm0,%%mm1\n\t"
/*#6 Bias high residue.*/
"paddsw %%mm0,%%mm2\n\t"
/*#6 Pack to byte.*/
"packuswb %%mm2,%%mm1\n\t"
/*#7 Bias low residue.*/
"paddsw %%mm0,%%mm3\n\t"
/*#7 Bias high residue.*/
"paddsw %%mm0,%%mm4\n\t"
/*#7 Pack to byte.*/
"packuswb %%mm4,%%mm3\n\t"
/*#6 Write row.*/
"movq %%mm1,(%[dst4],%[ystride],2)\n\t"
/*#7 Write row.*/
"movq %%mm3,(%[dst4],%[ystride3])\n\t"
:
:[residue]"r"(_residue),
[dst]"r"(_dst),
[dst4]"r"(_dst+(_ystride<<2)),
[ystride]"r"((ptrdiff_t)_ystride),
[ystride3]"r"((ptrdiff_t)_ystride*3)
:"memory"
);
}
void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src,
int _ystride,const ogg_int16_t *_residue){
int i;
/*Zero mm0.*/
__asm__ __volatile__("pxor %%mm0,%%mm0\n\t"::);
for(i=4;i-->0;){
__asm__ __volatile__(
/*#0 Load source.*/
"movq (%[src]),%%mm3\n\t"
/*#1 Load source.*/
"movq (%[src],%[ystride]),%%mm7\n\t"
/*#0 Get copy of src.*/
"movq %%mm3,%%mm4\n\t"
/*#0 Expand high source.*/
"punpckhbw %%mm0,%%mm4\n\t"
/*#0 Expand low source.*/
"punpcklbw %%mm0,%%mm3\n\t"
/*#0 Add residue high.*/
"paddsw 8(%[residue]),%%mm4\n\t"
/*#1 Get copy of src.*/
"movq %%mm7,%%mm2\n\t"
/*#0 Add residue low.*/
"paddsw (%[residue]), %%mm3\n\t"
/*#1 Expand high source.*/
"punpckhbw %%mm0,%%mm2\n\t"
/*#0 Pack final row pixels.*/
"packuswb %%mm4,%%mm3\n\t"
/*#1 Expand low source.*/
"punpcklbw %%mm0,%%mm7\n\t"
/*#1 Add residue low.*/
"paddsw 16(%[residue]),%%mm7\n\t"
/*#1 Add residue high.*/
"paddsw 24(%[residue]),%%mm2\n\t"
/*Advance residue.*/
"lea 32(%[residue]),%[residue]\n\t"
/*#1 Pack final row pixels.*/
"packuswb %%mm2,%%mm7\n\t"
/*Advance src.*/
"lea (%[src],%[ystride],2),%[src]\n\t"
/*#0 Write row.*/
"movq %%mm3,(%[dst])\n\t"
/*#1 Write row.*/
"movq %%mm7,(%[dst],%[ystride])\n\t"
/*Advance dst.*/
"lea (%[dst],%[ystride],2),%[dst]\n\t"
:[residue]"+r"(_residue),[dst]"+r"(_dst),[src]"+r"(_src)
:[ystride]"r"((ptrdiff_t)_ystride)
:"memory"
);
}
}
void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1,
const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){
int i;
/*Zero mm7.*/
__asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::);
for(i=4;i-->0;){
__asm__ __volatile__(
/*#0 Load src1.*/
"movq (%[src1]),%%mm0\n\t"
/*#0 Load src2.*/
"movq (%[src2]),%%mm2\n\t"
/*#0 Copy src1.*/
"movq %%mm0,%%mm1\n\t"
/*#0 Copy src2.*/
"movq %%mm2,%%mm3\n\t"
/*#1 Load src1.*/
"movq (%[src1],%[ystride]),%%mm4\n\t"
/*#0 Unpack lower src1.*/
"punpcklbw %%mm7,%%mm0\n\t"
/*#1 Load src2.*/
"movq (%[src2],%[ystride]),%%mm5\n\t"
/*#0 Unpack higher src1.*/
"punpckhbw %%mm7,%%mm1\n\t"
/*#0 Unpack lower src2.*/
"punpcklbw %%mm7,%%mm2\n\t"
/*#0 Unpack higher src2.*/
"punpckhbw %%mm7,%%mm3\n\t"
/*Advance src1 ptr.*/
"lea (%[src1],%[ystride],2),%[src1]\n\t"
/*Advance src2 ptr.*/
"lea (%[src2],%[ystride],2),%[src2]\n\t"
/*#0 Lower src1+src2.*/
"paddsw %%mm2,%%mm0\n\t"
/*#0 Higher src1+src2.*/
"paddsw %%mm3,%%mm1\n\t"
/*#1 Copy src1.*/
"movq %%mm4,%%mm2\n\t"
/*#0 Build lo average.*/
"psraw $1,%%mm0\n\t"
/*#1 Copy src2.*/
"movq %%mm5,%%mm3\n\t"
/*#1 Unpack lower src1.*/
"punpcklbw %%mm7,%%mm4\n\t"
/*#0 Build hi average.*/
"psraw $1,%%mm1\n\t"
/*#1 Unpack higher src1.*/
"punpckhbw %%mm7,%%mm2\n\t"
/*#0 low+=residue.*/
"paddsw (%[residue]),%%mm0\n\t"
/*#1 Unpack lower src2.*/
"punpcklbw %%mm7,%%mm5\n\t"
/*#0 high+=residue.*/
"paddsw 8(%[residue]),%%mm1\n\t"
/*#1 Unpack higher src2.*/
"punpckhbw %%mm7,%%mm3\n\t"
/*#1 Lower src1+src2.*/
"paddsw %%mm4,%%mm5\n\t"
/*#0 Pack and saturate.*/
"packuswb %%mm1,%%mm0\n\t"
/*#1 Higher src1+src2.*/
"paddsw %%mm2,%%mm3\n\t"
/*#0 Write row.*/
"movq %%mm0,(%[dst])\n\t"
/*#1 Build lo average.*/
"psraw $1,%%mm5\n\t"
/*#1 Build hi average.*/
"psraw $1,%%mm3\n\t"
/*#1 low+=residue.*/
"paddsw 16(%[residue]),%%mm5\n\t"
/*#1 high+=residue.*/
"paddsw 24(%[residue]),%%mm3\n\t"
/*#1 Pack and saturate.*/
"packuswb %%mm3,%%mm5\n\t"
/*#1 Write row ptr.*/
"movq %%mm5,(%[dst],%[ystride])\n\t"
/*Advance residue ptr.*/
"add $32,%[residue]\n\t"
/*Advance dest ptr.*/
"lea (%[dst],%[ystride],2),%[dst]\n\t"
:[dst]"+r"(_dst),[residue]"+r"(_residue),
[src1]"+%r"(_src1),[src2]"+r"(_src2)
:[ystride]"r"((ptrdiff_t)_ystride)
:"memory"
);
}
}
void oc_restore_fpu_mmx(void){
__asm__ __volatile__("emms\n\t");
}
#endif

View File

@ -0,0 +1,64 @@
#if !defined(_x86_mmxfrag_H)
# define _x86_mmxfrag_H (1)
# include <stddef.h>
# include "x86int.h"
#if defined(OC_X86_ASM)
/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes
between rows.*/
#define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \
do{ \
const unsigned char *src; \
unsigned char *dst; \
ptrdiff_t ystride3; \
src=(_src); \
dst=(_dst); \
__asm__ __volatile__( \
/*src+0*ystride*/ \
"movq (%[src]),%%mm0\n\t" \
/*src+1*ystride*/ \
"movq (%[src],%[ystride]),%%mm1\n\t" \
/*ystride3=ystride*3*/ \
"lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \
/*src+2*ystride*/ \
"movq (%[src],%[ystride],2),%%mm2\n\t" \
/*src+3*ystride*/ \
"movq (%[src],%[ystride3]),%%mm3\n\t" \
/*dst+0*ystride*/ \
"movq %%mm0,(%[dst])\n\t" \
/*dst+1*ystride*/ \
"movq %%mm1,(%[dst],%[ystride])\n\t" \
/*Pointer to next 4.*/ \
"lea (%[src],%[ystride],4),%[src]\n\t" \
/*dst+2*ystride*/ \
"movq %%mm2,(%[dst],%[ystride],2)\n\t" \
/*dst+3*ystride*/ \
"movq %%mm3,(%[dst],%[ystride3])\n\t" \
/*Pointer to next 4.*/ \
"lea (%[dst],%[ystride],4),%[dst]\n\t" \
/*src+0*ystride*/ \
"movq (%[src]),%%mm0\n\t" \
/*src+1*ystride*/ \
"movq (%[src],%[ystride]),%%mm1\n\t" \
/*src+2*ystride*/ \
"movq (%[src],%[ystride],2),%%mm2\n\t" \
/*src+3*ystride*/ \
"movq (%[src],%[ystride3]),%%mm3\n\t" \
/*dst+0*ystride*/ \
"movq %%mm0,(%[dst])\n\t" \
/*dst+1*ystride*/ \
"movq %%mm1,(%[dst],%[ystride])\n\t" \
/*dst+2*ystride*/ \
"movq %%mm2,(%[dst],%[ystride],2)\n\t" \
/*dst+3*ystride*/ \
"movq %%mm3,(%[dst],%[ystride3])\n\t" \
:[dst]"+r"(dst),[src]"+r"(src),[ystride3]"=&r"(ystride3) \
:[ystride]"r"((ptrdiff_t)(_ystride)) \
:"memory" \
); \
} \
while(0)
# endif
#endif

View File

@ -0,0 +1,564 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
/*MMX acceleration of Theora's iDCT.
Originally written by Rudolf Marek, based on code from On2's VP3.*/
#include "x86int.h"
#include "../dct.h"
#if defined(OC_X86_ASM)
/*These are offsets into the table of constants below.*/
/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/
#define OC_COSINE_OFFSET (0)
/*A row of 8's.*/
#define OC_EIGHT_OFFSET (56)
/*A table of constants used by the MMX routines.*/
static const ogg_uint16_t __attribute__((aligned(8),used))
OC_IDCT_CONSTS[(7+1)*4]={
(ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
(ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
(ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
(ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
(ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
(ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
(ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
(ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
(ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
(ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
(ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
(ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
(ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
(ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
8, 8, 8, 8
};
/*Converts the expression in the argument to a string.*/
#define OC_M2STR(_s) #_s
/*38 cycles*/
#define OC_IDCT_BEGIN \
"#OC_IDCT_BEGIN\n\t" \
"movq "OC_I(3)",%%mm2\n\t" \
"movq "OC_C(3)",%%mm6\n\t" \
"movq %%mm2,%%mm4\n\t" \
"movq "OC_J(5)",%%mm7\n\t" \
"pmulhw %%mm6,%%mm4\n\t" \
"movq "OC_C(5)",%%mm1\n\t" \
"pmulhw %%mm7,%%mm6\n\t" \
"movq %%mm1,%%mm5\n\t" \
"pmulhw %%mm2,%%mm1\n\t" \
"movq "OC_I(1)",%%mm3\n\t" \
"pmulhw %%mm7,%%mm5\n\t" \
"movq "OC_C(1)",%%mm0\n\t" \
"paddw %%mm2,%%mm4\n\t" \
"paddw %%mm7,%%mm6\n\t" \
"paddw %%mm1,%%mm2\n\t" \
"movq "OC_J(7)",%%mm1\n\t" \
"paddw %%mm5,%%mm7\n\t" \
"movq %%mm0,%%mm5\n\t" \
"pmulhw %%mm3,%%mm0\n\t" \
"paddw %%mm7,%%mm4\n\t" \
"pmulhw %%mm1,%%mm5\n\t" \
"movq "OC_C(7)",%%mm7\n\t" \
"psubw %%mm2,%%mm6\n\t" \
"paddw %%mm3,%%mm0\n\t" \
"pmulhw %%mm7,%%mm3\n\t" \
"movq "OC_I(2)",%%mm2\n\t" \
"pmulhw %%mm1,%%mm7\n\t" \
"paddw %%mm1,%%mm5\n\t" \
"movq %%mm2,%%mm1\n\t" \
"pmulhw "OC_C(2)",%%mm2\n\t" \
"psubw %%mm5,%%mm3\n\t" \
"movq "OC_J(6)",%%mm5\n\t" \
"paddw %%mm7,%%mm0\n\t" \
"movq %%mm5,%%mm7\n\t" \
"psubw %%mm4,%%mm0\n\t" \
"pmulhw "OC_C(2)",%%mm5\n\t" \
"paddw %%mm1,%%mm2\n\t" \
"pmulhw "OC_C(6)",%%mm1\n\t" \
"paddw %%mm4,%%mm4\n\t" \
"paddw %%mm0,%%mm4\n\t" \
"psubw %%mm6,%%mm3\n\t" \
"paddw %%mm7,%%mm5\n\t" \
"paddw %%mm6,%%mm6\n\t" \
"pmulhw "OC_C(6)",%%mm7\n\t" \
"paddw %%mm3,%%mm6\n\t" \
"movq %%mm4,"OC_I(1)"\n\t" \
"psubw %%mm5,%%mm1\n\t" \
"movq "OC_C(4)",%%mm4\n\t" \
"movq %%mm3,%%mm5\n\t" \
"pmulhw %%mm4,%%mm3\n\t" \
"paddw %%mm2,%%mm7\n\t" \
"movq %%mm6,"OC_I(2)"\n\t" \
"movq %%mm0,%%mm2\n\t" \
"movq "OC_I(0)",%%mm6\n\t" \
"pmulhw %%mm4,%%mm0\n\t" \
"paddw %%mm3,%%mm5\n\t" \
"movq "OC_J(4)",%%mm3\n\t" \
"psubw %%mm1,%%mm5\n\t" \
"paddw %%mm0,%%mm2\n\t" \
"psubw %%mm3,%%mm6\n\t" \
"movq %%mm6,%%mm0\n\t" \
"pmulhw %%mm4,%%mm6\n\t" \
"paddw %%mm3,%%mm3\n\t" \
"paddw %%mm1,%%mm1\n\t" \
"paddw %%mm0,%%mm3\n\t" \
"paddw %%mm5,%%mm1\n\t" \
"pmulhw %%mm3,%%mm4\n\t" \
"paddw %%mm0,%%mm6\n\t" \
"psubw %%mm2,%%mm6\n\t" \
"paddw %%mm2,%%mm2\n\t" \
"movq "OC_I(1)",%%mm0\n\t" \
"paddw %%mm6,%%mm2\n\t" \
"paddw %%mm3,%%mm4\n\t" \
"psubw %%mm1,%%mm2\n\t" \
"#end OC_IDCT_BEGIN\n\t" \
/*38+8=46 cycles.*/
#define OC_ROW_IDCT \
"#OC_ROW_IDCT\n" \
OC_IDCT_BEGIN \
/*r3=D'*/ \
"movq "OC_I(2)",%%mm3\n\t" \
/*r4=E'=E-G*/ \
"psubw %%mm7,%%mm4\n\t" \
/*r1=H'+H'*/ \
"paddw %%mm1,%%mm1\n\t" \
/*r7=G+G*/ \
"paddw %%mm7,%%mm7\n\t" \
/*r1=R1=A''+H'*/ \
"paddw %%mm2,%%mm1\n\t" \
/*r7=G'=E+G*/ \
"paddw %%mm4,%%mm7\n\t" \
/*r4=R4=E'-D'*/ \
"psubw %%mm3,%%mm4\n\t" \
"paddw %%mm3,%%mm3\n\t" \
/*r6=R6=F'-B''*/ \
"psubw %%mm5,%%mm6\n\t" \
"paddw %%mm5,%%mm5\n\t" \
/*r3=R3=E'+D'*/ \
"paddw %%mm4,%%mm3\n\t" \
/*r5=R5=F'+B''*/ \
"paddw %%mm6,%%mm5\n\t" \
/*r7=R7=G'-C'*/ \
"psubw %%mm0,%%mm7\n\t" \
"paddw %%mm0,%%mm0\n\t" \
/*Save R1.*/ \
"movq %%mm1,"OC_I(1)"\n\t" \
/*r0=R0=G.+C.*/ \
"paddw %%mm7,%%mm0\n\t" \
"#end OC_ROW_IDCT\n\t" \
/*The following macro does two 4x4 transposes in place.
At entry, we assume:
r0 = a3 a2 a1 a0
I(1) = b3 b2 b1 b0
r2 = c3 c2 c1 c0
r3 = d3 d2 d1 d0
r4 = e3 e2 e1 e0
r5 = f3 f2 f1 f0
r6 = g3 g2 g1 g0
r7 = h3 h2 h1 h0
At exit, we have:
I(0) = d0 c0 b0 a0
I(1) = d1 c1 b1 a1
I(2) = d2 c2 b2 a2
I(3) = d3 c3 b3 a3
J(4) = h0 g0 f0 e0
J(5) = h1 g1 f1 e1
J(6) = h2 g2 f2 e2
J(7) = h3 g3 f3 e3
I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7.
Since r1 is free at entry, we calculate the Js first.*/
/*19 cycles.*/
#define OC_TRANSPOSE \
"#OC_TRANSPOSE\n\t" \
"movq %%mm4,%%mm1\n\t" \
"punpcklwd %%mm5,%%mm4\n\t" \
"movq %%mm0,"OC_I(0)"\n\t" \
"punpckhwd %%mm5,%%mm1\n\t" \
"movq %%mm6,%%mm0\n\t" \
"punpcklwd %%mm7,%%mm6\n\t" \
"movq %%mm4,%%mm5\n\t" \
"punpckldq %%mm6,%%mm4\n\t" \
"punpckhdq %%mm6,%%mm5\n\t" \
"movq %%mm1,%%mm6\n\t" \
"movq %%mm4,"OC_J(4)"\n\t" \
"punpckhwd %%mm7,%%mm0\n\t" \
"movq %%mm5,"OC_J(5)"\n\t" \
"punpckhdq %%mm0,%%mm6\n\t" \
"movq "OC_I(0)",%%mm4\n\t" \
"punpckldq %%mm0,%%mm1\n\t" \
"movq "OC_I(1)",%%mm5\n\t" \
"movq %%mm4,%%mm0\n\t" \
"movq %%mm6,"OC_J(7)"\n\t" \
"punpcklwd %%mm5,%%mm0\n\t" \
"movq %%mm1,"OC_J(6)"\n\t" \
"punpckhwd %%mm5,%%mm4\n\t" \
"movq %%mm2,%%mm5\n\t" \
"punpcklwd %%mm3,%%mm2\n\t" \
"movq %%mm0,%%mm1\n\t" \
"punpckldq %%mm2,%%mm0\n\t" \
"punpckhdq %%mm2,%%mm1\n\t" \
"movq %%mm4,%%mm2\n\t" \
"movq %%mm0,"OC_I(0)"\n\t" \
"punpckhwd %%mm3,%%mm5\n\t" \
"movq %%mm1,"OC_I(1)"\n\t" \
"punpckhdq %%mm5,%%mm4\n\t" \
"punpckldq %%mm5,%%mm2\n\t" \
"movq %%mm4,"OC_I(3)"\n\t" \
"movq %%mm2,"OC_I(2)"\n\t" \
"#end OC_TRANSPOSE\n\t" \
/*38+19=57 cycles.*/
#define OC_COLUMN_IDCT \
"#OC_COLUMN_IDCT\n" \
OC_IDCT_BEGIN \
"paddw "OC_8",%%mm2\n\t" \
/*r1=H'+H'*/ \
"paddw %%mm1,%%mm1\n\t" \
/*r1=R1=A''+H'*/ \
"paddw %%mm2,%%mm1\n\t" \
/*r2=NR2*/ \
"psraw $4,%%mm2\n\t" \
/*r4=E'=E-G*/ \
"psubw %%mm7,%%mm4\n\t" \
/*r1=NR1*/ \
"psraw $4,%%mm1\n\t" \
/*r3=D'*/ \
"movq "OC_I(2)",%%mm3\n\t" \
/*r7=G+G*/ \
"paddw %%mm7,%%mm7\n\t" \
/*Store NR2 at I(2).*/ \
"movq %%mm2,"OC_I(2)"\n\t" \
/*r7=G'=E+G*/ \
"paddw %%mm4,%%mm7\n\t" \
/*Store NR1 at I(1).*/ \
"movq %%mm1,"OC_I(1)"\n\t" \
/*r4=R4=E'-D'*/ \
"psubw %%mm3,%%mm4\n\t" \
"paddw "OC_8",%%mm4\n\t" \
/*r3=D'+D'*/ \
"paddw %%mm3,%%mm3\n\t" \
/*r3=R3=E'+D'*/ \
"paddw %%mm4,%%mm3\n\t" \
/*r4=NR4*/ \
"psraw $4,%%mm4\n\t" \
/*r6=R6=F'-B''*/ \
"psubw %%mm5,%%mm6\n\t" \
/*r3=NR3*/ \
"psraw $4,%%mm3\n\t" \
"paddw "OC_8",%%mm6\n\t" \
/*r5=B''+B''*/ \
"paddw %%mm5,%%mm5\n\t" \
/*r5=R5=F'+B''*/ \
"paddw %%mm6,%%mm5\n\t" \
/*r6=NR6*/ \
"psraw $4,%%mm6\n\t" \
/*Store NR4 at J(4).*/ \
"movq %%mm4,"OC_J(4)"\n\t" \
/*r5=NR5*/ \
"psraw $4,%%mm5\n\t" \
/*Store NR3 at I(3).*/ \
"movq %%mm3,"OC_I(3)"\n\t" \
/*r7=R7=G'-C'*/ \
"psubw %%mm0,%%mm7\n\t" \
"paddw "OC_8",%%mm7\n\t" \
/*r0=C'+C'*/ \
"paddw %%mm0,%%mm0\n\t" \
/*r0=R0=G'+C'*/ \
"paddw %%mm7,%%mm0\n\t" \
/*r7=NR7*/ \
"psraw $4,%%mm7\n\t" \
/*Store NR6 at J(6).*/ \
"movq %%mm6,"OC_J(6)"\n\t" \
/*r0=NR0*/ \
"psraw $4,%%mm0\n\t" \
/*Store NR5 at J(5).*/ \
"movq %%mm5,"OC_J(5)"\n\t" \
/*Store NR7 at J(7).*/ \
"movq %%mm7,"OC_J(7)"\n\t" \
/*Store NR0 at I(0).*/ \
"movq %%mm0,"OC_I(0)"\n\t" \
"#end OC_COLUMN_IDCT\n\t" \
#define OC_MID(_m,_i) OC_M2STR(_m+(_i)*8)"(%[c])"
#define OC_C(_i) OC_MID(OC_COSINE_OFFSET,_i-1)
#define OC_8 OC_MID(OC_EIGHT_OFFSET,0)
static void oc_idct8x8_slow(ogg_int16_t _y[64]){
/*This routine accepts an 8x8 matrix, but in partially transposed form.
Every 4x4 block is transposed.*/
__asm__ __volatile__(
#define OC_I(_k) OC_M2STR((_k*16))"(%[y])"
#define OC_J(_k) OC_M2STR(((_k-4)*16)+8)"(%[y])"
OC_ROW_IDCT
OC_TRANSPOSE
#undef OC_I
#undef OC_J
#define OC_I(_k) OC_M2STR((_k*16)+64)"(%[y])"
#define OC_J(_k) OC_M2STR(((_k-4)*16)+72)"(%[y])"
OC_ROW_IDCT
OC_TRANSPOSE
#undef OC_I
#undef OC_J
#define OC_I(_k) OC_M2STR((_k*16))"(%[y])"
#define OC_J(_k) OC_I(_k)
OC_COLUMN_IDCT
#undef OC_I
#undef OC_J
#define OC_I(_k) OC_M2STR((_k*16)+8)"(%[y])"
#define OC_J(_k) OC_I(_k)
OC_COLUMN_IDCT
#undef OC_I
#undef OC_J
:
:[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS)
);
}
/*25 cycles.*/
#define OC_IDCT_BEGIN_10 \
"#OC_IDCT_BEGIN_10\n\t" \
"movq "OC_I(3)",%%mm2\n\t" \
"nop\n\t" \
"movq "OC_C(3)",%%mm6\n\t" \
"movq %%mm2,%%mm4\n\t" \
"movq "OC_C(5)",%%mm1\n\t" \
"pmulhw %%mm6,%%mm4\n\t" \
"movq "OC_I(1)",%%mm3\n\t" \
"pmulhw %%mm2,%%mm1\n\t" \
"movq "OC_C(1)",%%mm0\n\t" \
"paddw %%mm2,%%mm4\n\t" \
"pxor %%mm6,%%mm6\n\t" \
"paddw %%mm1,%%mm2\n\t" \
"movq "OC_I(2)",%%mm5\n\t" \
"pmulhw %%mm3,%%mm0\n\t" \
"movq %%mm5,%%mm1\n\t" \
"paddw %%mm3,%%mm0\n\t" \
"pmulhw "OC_C(7)",%%mm3\n\t" \
"psubw %%mm2,%%mm6\n\t" \
"pmulhw "OC_C(2)",%%mm5\n\t" \
"psubw %%mm4,%%mm0\n\t" \
"movq "OC_I(2)",%%mm7\n\t" \
"paddw %%mm4,%%mm4\n\t" \
"paddw %%mm5,%%mm7\n\t" \
"paddw %%mm0,%%mm4\n\t" \
"pmulhw "OC_C(6)",%%mm1\n\t" \
"psubw %%mm6,%%mm3\n\t" \
"movq %%mm4,"OC_I(1)"\n\t" \
"paddw %%mm6,%%mm6\n\t" \
"movq "OC_C(4)",%%mm4\n\t" \
"paddw %%mm3,%%mm6\n\t" \
"movq %%mm3,%%mm5\n\t" \
"pmulhw %%mm4,%%mm3\n\t" \
"movq %%mm6,"OC_I(2)"\n\t" \
"movq %%mm0,%%mm2\n\t" \
"movq "OC_I(0)",%%mm6\n\t" \
"pmulhw %%mm4,%%mm0\n\t" \
"paddw %%mm3,%%mm5\n\t" \
"paddw %%mm0,%%mm2\n\t" \
"psubw %%mm1,%%mm5\n\t" \
"pmulhw %%mm4,%%mm6\n\t" \
"paddw "OC_I(0)",%%mm6\n\t" \
"paddw %%mm1,%%mm1\n\t" \
"movq %%mm6,%%mm4\n\t" \
"paddw %%mm5,%%mm1\n\t" \
"psubw %%mm2,%%mm6\n\t" \
"paddw %%mm2,%%mm2\n\t" \
"movq "OC_I(1)",%%mm0\n\t" \
"paddw %%mm6,%%mm2\n\t" \
"psubw %%mm1,%%mm2\n\t" \
"nop\n\t" \
"#end OC_IDCT_BEGIN_10\n\t" \
/*25+8=33 cycles.*/
#define OC_ROW_IDCT_10 \
"#OC_ROW_IDCT_10\n\t" \
OC_IDCT_BEGIN_10 \
/*r3=D'*/ \
"movq "OC_I(2)",%%mm3\n\t" \
/*r4=E'=E-G*/ \
"psubw %%mm7,%%mm4\n\t" \
/*r1=H'+H'*/ \
"paddw %%mm1,%%mm1\n\t" \
/*r7=G+G*/ \
"paddw %%mm7,%%mm7\n\t" \
/*r1=R1=A''+H'*/ \
"paddw %%mm2,%%mm1\n\t" \
/*r7=G'=E+G*/ \
"paddw %%mm4,%%mm7\n\t" \
/*r4=R4=E'-D'*/ \
"psubw %%mm3,%%mm4\n\t" \
"paddw %%mm3,%%mm3\n\t" \
/*r6=R6=F'-B''*/ \
"psubw %%mm5,%%mm6\n\t" \
"paddw %%mm5,%%mm5\n\t" \
/*r3=R3=E'+D'*/ \
"paddw %%mm4,%%mm3\n\t" \
/*r5=R5=F'+B''*/ \
"paddw %%mm6,%%mm5\n\t" \
/*r7=R7=G'-C'*/ \
"psubw %%mm0,%%mm7\n\t" \
"paddw %%mm0,%%mm0\n\t" \
/*Save R1.*/ \
"movq %%mm1,"OC_I(1)"\n\t" \
/*r0=R0=G'+C'*/ \
"paddw %%mm7,%%mm0\n\t" \
"#end OC_ROW_IDCT_10\n\t" \
/*25+19=44 cycles'*/
#define OC_COLUMN_IDCT_10 \
"#OC_COLUMN_IDCT_10\n\t" \
OC_IDCT_BEGIN_10 \
"paddw "OC_8",%%mm2\n\t" \
/*r1=H'+H'*/ \
"paddw %%mm1,%%mm1\n\t" \
/*r1=R1=A''+H'*/ \
"paddw %%mm2,%%mm1\n\t" \
/*r2=NR2*/ \
"psraw $4,%%mm2\n\t" \
/*r4=E'=E-G*/ \
"psubw %%mm7,%%mm4\n\t" \
/*r1=NR1*/ \
"psraw $4,%%mm1\n\t" \
/*r3=D'*/ \
"movq "OC_I(2)",%%mm3\n\t" \
/*r7=G+G*/ \
"paddw %%mm7,%%mm7\n\t" \
/*Store NR2 at I(2).*/ \
"movq %%mm2,"OC_I(2)"\n\t" \
/*r7=G'=E+G*/ \
"paddw %%mm4,%%mm7\n\t" \
/*Store NR1 at I(1).*/ \
"movq %%mm1,"OC_I(1)"\n\t" \
/*r4=R4=E'-D'*/ \
"psubw %%mm3,%%mm4\n\t" \
"paddw "OC_8",%%mm4\n\t" \
/*r3=D'+D'*/ \
"paddw %%mm3,%%mm3\n\t" \
/*r3=R3=E'+D'*/ \
"paddw %%mm4,%%mm3\n\t" \
/*r4=NR4*/ \
"psraw $4,%%mm4\n\t" \
/*r6=R6=F'-B''*/ \
"psubw %%mm5,%%mm6\n\t" \
/*r3=NR3*/ \
"psraw $4,%%mm3\n\t" \
"paddw "OC_8",%%mm6\n\t" \
/*r5=B''+B''*/ \
"paddw %%mm5,%%mm5\n\t" \
/*r5=R5=F'+B''*/ \
"paddw %%mm6,%%mm5\n\t" \
/*r6=NR6*/ \
"psraw $4,%%mm6\n\t" \
/*Store NR4 at J(4).*/ \
"movq %%mm4,"OC_J(4)"\n\t" \
/*r5=NR5*/ \
"psraw $4,%%mm5\n\t" \
/*Store NR3 at I(3).*/ \
"movq %%mm3,"OC_I(3)"\n\t" \
/*r7=R7=G'-C'*/ \
"psubw %%mm0,%%mm7\n\t" \
"paddw "OC_8",%%mm7\n\t" \
/*r0=C'+C'*/ \
"paddw %%mm0,%%mm0\n\t" \
/*r0=R0=G'+C'*/ \
"paddw %%mm7,%%mm0\n\t" \
/*r7=NR7*/ \
"psraw $4,%%mm7\n\t" \
/*Store NR6 at J(6).*/ \
"movq %%mm6,"OC_J(6)"\n\t" \
/*r0=NR0*/ \
"psraw $4,%%mm0\n\t" \
/*Store NR5 at J(5).*/ \
"movq %%mm5,"OC_J(5)"\n\t" \
/*Store NR7 at J(7).*/ \
"movq %%mm7,"OC_J(7)"\n\t" \
/*Store NR0 at I(0).*/ \
"movq %%mm0,"OC_I(0)"\n\t" \
"#end OC_COLUMN_IDCT_10\n\t" \
static void oc_idct8x8_10(ogg_int16_t _y[64]){
__asm__ __volatile__(
#define OC_I(_k) OC_M2STR((_k*16))"(%[y])"
#define OC_J(_k) OC_M2STR(((_k-4)*16)+8)"(%[y])"
/*Done with dequant, descramble, and partial transpose.
Now do the iDCT itself.*/
OC_ROW_IDCT_10
OC_TRANSPOSE
#undef OC_I
#undef OC_J
#define OC_I(_k) OC_M2STR((_k*16))"(%[y])"
#define OC_J(_k) OC_I(_k)
OC_COLUMN_IDCT_10
#undef OC_I
#undef OC_J
#define OC_I(_k) OC_M2STR((_k*16)+8)"(%[y])"
#define OC_J(_k) OC_I(_k)
OC_COLUMN_IDCT_10
#undef OC_I
#undef OC_J
:
:[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS)
);
}
/*Performs an inverse 8x8 Type-II DCT transform.
The input is assumed to be scaled by a factor of 4 relative to orthonormal
version of the transform.*/
void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi){
/*_last_zzi is subtly different from an actual count of the number of
coefficients we decoded for this block.
It contains the value of zzi BEFORE the final token in the block was
decoded.
In most cases this is an EOB token (the continuation of an EOB run from a
previous block counts), and so this is the same as the coefficient count.
However, in the case that the last token was NOT an EOB token, but filled
the block up with exactly 64 coefficients, _last_zzi will be less than 64.
Provided the last token was not a pure zero run, the minimum value it can
be is 46, and so that doesn't affect any of the cases in this routine.
However, if the last token WAS a pure zero run of length 63, then _last_zzi
will be 1 while the number of coefficients decoded is 64.
Thus, we will trigger the following special case, where the real
coefficient count would not.
Note also that a zero run of length 64 will give _last_zzi a value of 0,
but we still process the DC coefficient, which might have a non-zero value
due to DC prediction.
Although convoluted, this is arguably the correct behavior: it allows us to
use a smaller transform when the block ends with a long zero run instead
of a normal EOB token.
It could be smarter... multiple separate zero runs at the end of a block
will fool it, but an encoder that generates these really deserves what it
gets.
Needless to say we inherited this approach from VP3.*/
/*Then perform the iDCT.*/
if(_last_zzi<10)oc_idct8x8_10(_y);
else oc_idct8x8_slow(_y);
}
#endif

View File

@ -0,0 +1,215 @@
#if !defined(_x86_mmxloop_H)
# define _x86_mmxloop_H (1)
# include <stddef.h>
# include "x86int.h"
#if defined(OC_X86_ASM)
/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}.
On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and
mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/
#define OC_LOOP_FILTER8_MMX \
"#OC_LOOP_FILTER8_MMX\n\t" \
/*mm7=0*/ \
"pxor %%mm7,%%mm7\n\t" \
/*mm6:mm0={a0,...,a7}*/ \
"movq %%mm0,%%mm6\n\t" \
"punpcklbw %%mm7,%%mm0\n\t" \
"punpckhbw %%mm7,%%mm6\n\t" \
/*mm3:mm5={d0,...,d7}*/ \
"movq %%mm3,%%mm5\n\t" \
"punpcklbw %%mm7,%%mm3\n\t" \
"punpckhbw %%mm7,%%mm5\n\t" \
/*mm6:mm0={a0-d0,...,a7-d7}*/ \
"psubw %%mm3,%%mm0\n\t" \
"psubw %%mm5,%%mm6\n\t" \
/*mm3:mm1={b0,...,b7}*/ \
"movq %%mm1,%%mm3\n\t" \
"punpcklbw %%mm7,%%mm1\n\t" \
"movq %%mm2,%%mm4\n\t" \
"punpckhbw %%mm7,%%mm3\n\t" \
/*mm5:mm4={c0,...,c7}*/ \
"movq %%mm2,%%mm5\n\t" \
"punpcklbw %%mm7,%%mm4\n\t" \
"punpckhbw %%mm7,%%mm5\n\t" \
/*mm7={3}x4 \
mm5:mm4={c0-b0,...,c7-b7}*/ \
"pcmpeqw %%mm7,%%mm7\n\t" \
"psubw %%mm1,%%mm4\n\t" \
"psrlw $14,%%mm7\n\t" \
"psubw %%mm3,%%mm5\n\t" \
/*Scale by 3.*/ \
"pmullw %%mm7,%%mm4\n\t" \
"pmullw %%mm7,%%mm5\n\t" \
/*mm7={4}x4 \
mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \
"psrlw $1,%%mm7\n\t" \
"paddw %%mm0,%%mm4\n\t" \
"psllw $2,%%mm7\n\t" \
"movq (%[ll]),%%mm0\n\t" \
"paddw %%mm6,%%mm5\n\t" \
/*R_i has the range [-127,128], so we compute -R_i instead. \
mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \
"psubw %%mm7,%%mm4\n\t" \
"psubw %%mm7,%%mm5\n\t" \
"psraw $3,%%mm4\n\t" \
"psraw $3,%%mm5\n\t" \
"pcmpeqb %%mm7,%%mm7\n\t" \
"packsswb %%mm5,%%mm4\n\t" \
"pxor %%mm6,%%mm6\n\t" \
"pxor %%mm7,%%mm4\n\t" \
"packuswb %%mm3,%%mm1\n\t" \
/*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \
/*There's no unsigned byte+signed byte with unsigned saturation op code, so \
we have to split things by sign (the other option is to work in 16 bits, \
but working in 8 bits gives much better parallelism). \
We compute abs(R_i), but save a mask of which terms were negative in mm6. \
Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \
Finally, we split mm4 into positive and negative pieces using the mask in \
mm6, and add and subtract them as appropriate.*/ \
/*mm4=abs(-R_i)*/ \
/*mm7=255-2*L*/ \
"pcmpgtb %%mm4,%%mm6\n\t" \
"psubb %%mm0,%%mm7\n\t" \
"pxor %%mm6,%%mm4\n\t" \
"psubb %%mm0,%%mm7\n\t" \
"psubb %%mm6,%%mm4\n\t" \
/*mm7=255-max(2*L-abs(R_i),0)*/ \
"paddusb %%mm4,%%mm7\n\t" \
/*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \
"paddusb %%mm7,%%mm4\n\t" \
"psubusb %%mm7,%%mm4\n\t" \
/*Now split mm4 by the original sign of -R_i.*/ \
"movq %%mm4,%%mm5\n\t" \
"pand %%mm6,%%mm4\n\t" \
"pandn %%mm5,%%mm6\n\t" \
/*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \
/*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \
"paddusb %%mm4,%%mm1\n\t" \
"psubusb %%mm4,%%mm2\n\t" \
"psubusb %%mm6,%%mm1\n\t" \
"paddusb %%mm6,%%mm2\n\t" \
#define OC_LOOP_FILTER_V_MMX(_pix,_ystride,_ll) \
do{ \
ptrdiff_t ystride3__; \
__asm__ __volatile__( \
/*mm0={a0,...,a7}*/ \
"movq (%[pix]),%%mm0\n\t" \
/*ystride3=_ystride*3*/ \
"lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \
/*mm3={d0,...,d7}*/ \
"movq (%[pix],%[ystride3]),%%mm3\n\t" \
/*mm1={b0,...,b7}*/ \
"movq (%[pix],%[ystride]),%%mm1\n\t" \
/*mm2={c0,...,c7}*/ \
"movq (%[pix],%[ystride],2),%%mm2\n\t" \
OC_LOOP_FILTER8_MMX \
/*Write it back out.*/ \
"movq %%mm1,(%[pix],%[ystride])\n\t" \
"movq %%mm2,(%[pix],%[ystride],2)\n\t" \
:[ystride3]"=&r"(ystride3__) \
:[pix]"r"(_pix-_ystride*2),[ystride]"r"((ptrdiff_t)(_ystride)), \
[ll]"r"(_ll) \
:"memory" \
); \
} \
while(0)
#define OC_LOOP_FILTER_H_MMX(_pix,_ystride,_ll) \
do{ \
unsigned char *pix__; \
ptrdiff_t ystride3__; \
ptrdiff_t d__; \
pix__=(_pix)-2; \
__asm__ __volatile__( \
/*x x x x d0 c0 b0 a0*/ \
"movd (%[pix]),%%mm0\n\t" \
/*x x x x d1 c1 b1 a1*/ \
"movd (%[pix],%[ystride]),%%mm1\n\t" \
/*ystride3=_ystride*3*/ \
"lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \
/*x x x x d2 c2 b2 a2*/ \
"movd (%[pix],%[ystride],2),%%mm2\n\t" \
/*x x x x d3 c3 b3 a3*/ \
"lea (%[pix],%[ystride],4),%[d]\n\t" \
"movd (%[pix],%[ystride3]),%%mm3\n\t" \
/*x x x x d4 c4 b4 a4*/ \
"movd (%[d]),%%mm4\n\t" \
/*x x x x d5 c5 b5 a5*/ \
"movd (%[d],%[ystride]),%%mm5\n\t" \
/*x x x x d6 c6 b6 a6*/ \
"movd (%[d],%[ystride],2),%%mm6\n\t" \
/*x x x x d7 c7 b7 a7*/ \
"movd (%[d],%[ystride3]),%%mm7\n\t" \
/*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \
"punpcklbw %%mm1,%%mm0\n\t" \
/*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \
"punpcklbw %%mm3,%%mm2\n\t" \
/*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \
"movq %%mm0,%%mm3\n\t" \
/*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \
"punpcklwd %%mm2,%%mm0\n\t" \
/*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \
"punpckhwd %%mm2,%%mm3\n\t" \
/*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \
"movq %%mm0,%%mm1\n\t" \
/*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \
"punpcklbw %%mm5,%%mm4\n\t" \
/*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \
"punpcklbw %%mm7,%%mm6\n\t" \
/*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \
"movq %%mm4,%%mm5\n\t" \
/*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \
"punpcklwd %%mm6,%%mm4\n\t" \
/*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \
"punpckhwd %%mm6,%%mm5\n\t" \
/*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \
"movq %%mm3,%%mm2\n\t" \
/*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \
"punpckldq %%mm4,%%mm0\n\t" \
/*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \
"punpckhdq %%mm4,%%mm1\n\t" \
/*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \
"punpckldq %%mm5,%%mm2\n\t" \
/*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \
"punpckhdq %%mm5,%%mm3\n\t" \
OC_LOOP_FILTER8_MMX \
/*mm2={b0+R_0'',...,b7+R_7''}*/ \
"movq %%mm1,%%mm0\n\t" \
/*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \
"punpcklbw %%mm2,%%mm1\n\t" \
/*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \
"punpckhbw %%mm2,%%mm0\n\t" \
/*[d]=c1 b1 c0 b0*/ \
"movd %%mm1,%[d]\n\t" \
"movw %w[d],1(%[pix])\n\t" \
"psrlq $32,%%mm1\n\t" \
"shr $16,%[d]\n\t" \
"movw %w[d],1(%[pix],%[ystride])\n\t" \
/*[d]=c3 b3 c2 b2*/ \
"movd %%mm1,%[d]\n\t" \
"movw %w[d],1(%[pix],%[ystride],2)\n\t" \
"shr $16,%[d]\n\t" \
"movw %w[d],1(%[pix],%[ystride3])\n\t" \
"lea (%[pix],%[ystride],4),%[pix]\n\t" \
/*[d]=c5 b5 c4 b4*/ \
"movd %%mm0,%[d]\n\t" \
"movw %w[d],1(%[pix])\n\t" \
"psrlq $32,%%mm0\n\t" \
"shr $16,%[d]\n\t" \
"movw %w[d],1(%[pix],%[ystride])\n\t" \
/*[d]=c7 b7 c6 b6*/ \
"movd %%mm0,%[d]\n\t" \
"movw %w[d],1(%[pix],%[ystride],2)\n\t" \
"shr $16,%[d]\n\t" \
"movw %w[d],1(%[pix],%[ystride3])\n\t" \
:[pix]"+r"(pix__),[ystride3]"=&r"(ystride3__),[d]"=&r"(d__) \
:[ystride]"r"((ptrdiff_t)(_ystride)),[ll]"r"(_ll) \
:"memory" \
); \
} \
while(0)
# endif
#endif

View File

@ -0,0 +1,188 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: mmxstate.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
/*MMX acceleration of complete fragment reconstruction algorithm.
Originally written by Rudolf Marek.*/
#include <string.h>
#include "x86int.h"
#include "mmxfrag.h"
#include "mmxloop.h"
#if defined(OC_X86_ASM)
void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
unsigned char *dst;
ptrdiff_t frag_buf_off;
int ystride;
int mb_mode;
/*Apply the inverse transform.*/
/*Special case only having a DC component.*/
if(_last_zzi<2){
/*Note that this value must be unsigned, to keep the __asm__ block from
sign-extending it when it puts it in a register.*/
ogg_uint16_t p;
/*We round this dequant product (and not any of the others) because there's
no iDCT rounding.*/
p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
/*Fill _dct_coeffs with p.*/
__asm__ __volatile__(
/*mm0=0000 0000 0000 AAAA*/
"movd %[p],%%mm0\n\t"
/*mm0=0000 0000 AAAA AAAA*/
"punpcklwd %%mm0,%%mm0\n\t"
/*mm0=AAAA AAAA AAAA AAAA*/
"punpckldq %%mm0,%%mm0\n\t"
"movq %%mm0,(%[y])\n\t"
"movq %%mm0,8(%[y])\n\t"
"movq %%mm0,16(%[y])\n\t"
"movq %%mm0,24(%[y])\n\t"
"movq %%mm0,32(%[y])\n\t"
"movq %%mm0,40(%[y])\n\t"
"movq %%mm0,48(%[y])\n\t"
"movq %%mm0,56(%[y])\n\t"
"movq %%mm0,64(%[y])\n\t"
"movq %%mm0,72(%[y])\n\t"
"movq %%mm0,80(%[y])\n\t"
"movq %%mm0,88(%[y])\n\t"
"movq %%mm0,96(%[y])\n\t"
"movq %%mm0,104(%[y])\n\t"
"movq %%mm0,112(%[y])\n\t"
"movq %%mm0,120(%[y])\n\t"
:
:[y]"r"(_dct_coeffs),[p]"r"((unsigned)p)
:"memory"
);
}
else{
/*Dequantize the DC coefficient.*/
_dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
oc_idct8x8_mmx(_dct_coeffs,_last_zzi);
}
/*Fill in the target buffer.*/
frag_buf_off=_state->frag_buf_offs[_fragi];
mb_mode=_state->frags[_fragi].mb_mode;
ystride=_state->ref_ystride[_pli];
dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs);
else{
const unsigned char *ref;
int mvoffsets[2];
ref=
_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
_dct_coeffs);
}
else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs);
}
}
/*We copy these entire function to inline the actual MMX routines so that we
use only a single indirect call.*/
/*Copies the fragments specified by the lists of fragment indices from one
frame to another.
_fragis: A pointer to a list of fragment indices.
_nfragis: The number of fragment indices to copy.
_dst_frame: The reference frame to copy to.
_src_frame: The reference frame to copy from.
_pli: The color plane the fragments lie in.*/
void oc_state_frag_copy_list_mmx(const oc_theora_state *_state,
const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
int _dst_frame,int _src_frame,int _pli){
const ptrdiff_t *frag_buf_offs;
const unsigned char *src_frame_data;
unsigned char *dst_frame_data;
ptrdiff_t fragii;
int ystride;
dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]];
src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]];
ystride=_state->ref_ystride[_pli];
frag_buf_offs=_state->frag_buf_offs;
for(fragii=0;fragii<_nfragis;fragii++){
ptrdiff_t frag_buf_off;
frag_buf_off=frag_buf_offs[_fragis[fragii]];
OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off,
src_frame_data+frag_buf_off,ystride);
}
}
/*Apply the loop filter to a given set of fragment rows in the given plane.
The filter may be run on the bottom edge, affecting pixels in the next row of
fragments, so this row also needs to be available.
_bv: The bounding values array.
_refi: The index of the frame buffer to filter.
_pli: The color plane to filter.
_fragy0: The Y coordinate of the first fragment row to filter.
_fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
OC_ALIGN8(unsigned char ll[8]);
const oc_fragment_plane *fplane;
const oc_fragment *frags;
const ptrdiff_t *frag_buf_offs;
unsigned char *ref_frame_data;
ptrdiff_t fragi_top;
ptrdiff_t fragi_bot;
ptrdiff_t fragi0;
ptrdiff_t fragi0_end;
int ystride;
int nhfrags;
memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll));
fplane=_state->fplanes+_pli;
nhfrags=fplane->nhfrags;
fragi_top=fplane->froffset;
fragi_bot=fragi_top+fplane->nfrags;
fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
ystride=_state->ref_ystride[_pli];
frags=_state->frags;
frag_buf_offs=_state->frag_buf_offs;
ref_frame_data=_state->ref_frame_data[_refi];
/*The following loops are constructed somewhat non-intuitively on purpose.
The main idea is: if a block boundary has at least one coded fragment on
it, the filter is applied to it.
However, the order that the filters are applied in matters, and VP3 chose
the somewhat strange ordering used below.*/
while(fragi0<fragi0_end){
ptrdiff_t fragi;
ptrdiff_t fragi_end;
fragi=fragi0;
fragi_end=fragi+nhfrags;
while(fragi<fragi_end){
if(frags[fragi].coded){
unsigned char *ref;
ref=ref_frame_data+frag_buf_offs[fragi];
if(fragi>fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll);
if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll);
if(fragi+1<fragi_end&&!frags[fragi+1].coded){
OC_LOOP_FILTER_H_MMX(ref+8,ystride,ll);
}
if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
OC_LOOP_FILTER_V_MMX(ref+(ystride<<3),ystride,ll);
}
}
fragi++;
}
fragi0+=nhfrags;
}
}
#endif

View File

@ -0,0 +1,523 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************/
/*SSE2 fDCT implementation for x86_64.*/
/*$Id: fdct_ses2.c 14579 2008-03-12 06:42:40Z xiphmont $*/
#include <stddef.h>
#include "x86enc.h"
#if defined(OC_X86_64_ASM)
# define OC_FDCT8x8 \
/*Note: xmm15={0}x8 and xmm14={-1}x8.*/ \
"#OC_FDCT8x8\n\t" \
/*Stage 1:*/ \
"movdqa %%xmm0,%%xmm11\n\t" \
"movdqa %%xmm1,%%xmm10\n\t" \
"movdqa %%xmm2,%%xmm9\n\t" \
"movdqa %%xmm3,%%xmm8\n\t" \
/*xmm11=t7'=t0-t7*/ \
"psubw %%xmm7,%%xmm11\n\t" \
/*xmm10=t6'=t1-t6*/ \
"psubw %%xmm6,%%xmm10\n\t" \
/*xmm9=t5'=t2-t5*/ \
"psubw %%xmm5,%%xmm9\n\t" \
/*xmm8=t4'=t3-t4*/ \
"psubw %%xmm4,%%xmm8\n\t" \
/*xmm0=t0'=t0+t7*/ \
"paddw %%xmm7,%%xmm0\n\t" \
/*xmm1=t1'=t1+t6*/ \
"paddw %%xmm6,%%xmm1\n\t" \
/*xmm5=t2'=t2+t5*/ \
"paddw %%xmm2,%%xmm5\n\t" \
/*xmm4=t3'=t3+t4*/ \
"paddw %%xmm3,%%xmm4\n\t" \
/*xmm2,3,6,7 are now free.*/ \
/*Stage 2:*/ \
"movdqa %%xmm0,%%xmm3\n\t" \
"mov $0x5A806A0A,%[a]\n\t" \
"movdqa %%xmm1,%%xmm2\n\t" \
"movd %[a],%%xmm13\n\t" \
"movdqa %%xmm10,%%xmm6\n\t" \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
/*xmm2=t2''=t1'-t2'*/ \
"psubw %%xmm5,%%xmm2\n\t" \
"pxor %%xmm12,%%xmm12\n\t" \
/*xmm3=t3''=t0'-t3'*/ \
"psubw %%xmm4,%%xmm3\n\t" \
"psubw %%xmm14,%%xmm12\n\t" \
/*xmm10=t5''=t6'-t5'*/ \
"psubw %%xmm9,%%xmm10\n\t" \
"paddw %%xmm12,%%xmm12\n\t" \
/*xmm4=t0''=t0'+t3'*/ \
"paddw %%xmm0,%%xmm4\n\t" \
/*xmm1=t1''=t1'+t2'*/ \
"paddw %%xmm5,%%xmm1\n\t" \
/*xmm6=t6''=t6'+t5'*/ \
"paddw %%xmm9,%%xmm6\n\t" \
/*xmm0,xmm5,xmm9 are now free.*/ \
/*Stage 3:*/ \
/*xmm10:xmm5=t5''*27146+0xB500 \
xmm0=t5''*/ \
"movdqa %%xmm10,%%xmm5\n\t" \
"movdqa %%xmm10,%%xmm0\n\t" \
"punpckhwd %%xmm12,%%xmm10\n\t" \
"pmaddwd %%xmm13,%%xmm10\n\t" \
"punpcklwd %%xmm12,%%xmm5\n\t" \
"pmaddwd %%xmm13,%%xmm5\n\t" \
/*xmm5=(t5''*27146+0xB500>>16)+t5''*/ \
"psrad $16,%%xmm10\n\t" \
"psrad $16,%%xmm5\n\t" \
"packssdw %%xmm10,%%xmm5\n\t" \
"paddw %%xmm0,%%xmm5\n\t" \
/*xmm0=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \
"pcmpeqw %%xmm15,%%xmm0\n\t" \
"psubw %%xmm14,%%xmm0\n\t" \
"paddw %%xmm5,%%xmm0\n\t" \
"movdqa %%xmm8,%%xmm5\n\t" \
"psraw $1,%%xmm0\n\t" \
/*xmm5=t5'''=t4'-s*/ \
"psubw %%xmm0,%%xmm5\n\t" \
/*xmm8=t4''=t4'+s*/ \
"paddw %%xmm0,%%xmm8\n\t" \
/*xmm0,xmm7,xmm9,xmm10 are free.*/ \
/*xmm7:xmm9=t6''*27146+0xB500*/ \
"movdqa %%xmm6,%%xmm7\n\t" \
"movdqa %%xmm6,%%xmm9\n\t" \
"punpckhwd %%xmm12,%%xmm7\n\t" \
"pmaddwd %%xmm13,%%xmm7\n\t" \
"punpcklwd %%xmm12,%%xmm9\n\t" \
"pmaddwd %%xmm13,%%xmm9\n\t" \
/*xmm9=(t6''*27146+0xB500>>16)+t6''*/ \
"psrad $16,%%xmm7\n\t" \
"psrad $16,%%xmm9\n\t" \
"packssdw %%xmm7,%%xmm9\n\t" \
"paddw %%xmm6,%%xmm9\n\t" \
/*xmm9=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \
"pcmpeqw %%xmm15,%%xmm6\n\t" \
"psubw %%xmm14,%%xmm6\n\t" \
"paddw %%xmm6,%%xmm9\n\t" \
"movdqa %%xmm11,%%xmm7\n\t" \
"psraw $1,%%xmm9\n\t" \
/*xmm7=t6'''=t7'-s*/ \
"psubw %%xmm9,%%xmm7\n\t" \
/*xmm9=t7''=t7'+s*/ \
"paddw %%xmm11,%%xmm9\n\t" \
/*xmm0,xmm6,xmm10,xmm11 are free.*/ \
/*Stage 4:*/ \
/*xmm10:xmm0=t1''*27146+0xB500*/ \
"movdqa %%xmm1,%%xmm0\n\t" \
"movdqa %%xmm1,%%xmm10\n\t" \
"punpcklwd %%xmm12,%%xmm0\n\t" \
"pmaddwd %%xmm13,%%xmm0\n\t" \
"punpckhwd %%xmm12,%%xmm10\n\t" \
"pmaddwd %%xmm13,%%xmm10\n\t" \
/*xmm0=(t1''*27146+0xB500>>16)+t1''*/ \
"psrad $16,%%xmm0\n\t" \
"psrad $16,%%xmm10\n\t" \
"mov $0x20006A0A,%[a]\n\t" \
"packssdw %%xmm10,%%xmm0\n\t" \
"movd %[a],%%xmm13\n\t" \
"paddw %%xmm1,%%xmm0\n\t" \
/*xmm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \
"pcmpeqw %%xmm15,%%xmm1\n\t" \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
"psubw %%xmm14,%%xmm1\n\t" \
"paddw %%xmm1,%%xmm0\n\t" \
/*xmm10:xmm4=t0''*27146+0x4000*/ \
"movdqa %%xmm4,%%xmm1\n\t" \
"movdqa %%xmm4,%%xmm10\n\t" \
"punpcklwd %%xmm12,%%xmm4\n\t" \
"pmaddwd %%xmm13,%%xmm4\n\t" \
"punpckhwd %%xmm12,%%xmm10\n\t" \
"pmaddwd %%xmm13,%%xmm10\n\t" \
/*xmm4=(t0''*27146+0x4000>>16)+t0''*/ \
"psrad $16,%%xmm4\n\t" \
"psrad $16,%%xmm10\n\t" \
"mov $0x6CB7,%[a]\n\t" \
"packssdw %%xmm10,%%xmm4\n\t" \
"movd %[a],%%xmm12\n\t" \
"paddw %%xmm1,%%xmm4\n\t" \
/*xmm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \
"pcmpeqw %%xmm15,%%xmm1\n\t" \
"pshufd $00,%%xmm12,%%xmm12\n\t" \
"psubw %%xmm14,%%xmm1\n\t" \
"mov $0x7FFF6C84,%[a]\n\t" \
"paddw %%xmm1,%%xmm4\n\t" \
/*xmm0=_y[0]=u=r+s>>1 \
The naive implementation could cause overflow, so we use \
u=(r&s)+((r^s)>>1).*/ \
"movdqa %%xmm0,%%xmm6\n\t" \
"pxor %%xmm4,%%xmm0\n\t" \
"pand %%xmm4,%%xmm6\n\t" \
"psraw $1,%%xmm0\n\t" \
"movd %[a],%%xmm13\n\t" \
"paddw %%xmm6,%%xmm0\n\t" \
/*xmm4=_y[4]=v=r-u*/ \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
"psubw %%xmm0,%%xmm4\n\t" \
/*xmm1,xmm6,xmm10,xmm11 are free.*/ \
/*xmm6:xmm10=60547*t3''+0x6CB7*/ \
"movdqa %%xmm3,%%xmm10\n\t" \
"movdqa %%xmm3,%%xmm6\n\t" \
"punpcklwd %%xmm3,%%xmm10\n\t" \
"pmaddwd %%xmm13,%%xmm10\n\t" \
"mov $0x61F861F8,%[a]\n\t" \
"punpckhwd %%xmm3,%%xmm6\n\t" \
"pmaddwd %%xmm13,%%xmm6\n\t" \
"movd %[a],%%xmm13\n\t" \
"paddd %%xmm12,%%xmm10\n\t" \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
"paddd %%xmm12,%%xmm6\n\t" \
/*xmm1:xmm2=25080*t2'' \
xmm12=t2''*/ \
"movdqa %%xmm2,%%xmm11\n\t" \
"movdqa %%xmm2,%%xmm12\n\t" \
"pmullw %%xmm13,%%xmm2\n\t" \
"pmulhw %%xmm13,%%xmm11\n\t" \
"movdqa %%xmm2,%%xmm1\n\t" \
"punpcklwd %%xmm11,%%xmm2\n\t" \
"punpckhwd %%xmm11,%%xmm1\n\t" \
/*xmm10=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \
"paddd %%xmm2,%%xmm10\n\t" \
"paddd %%xmm1,%%xmm6\n\t" \
"psrad $16,%%xmm10\n\t" \
"pcmpeqw %%xmm15,%%xmm3\n\t" \
"psrad $16,%%xmm6\n\t" \
"psubw %%xmm14,%%xmm3\n\t" \
"packssdw %%xmm6,%%xmm10\n\t" \
"paddw %%xmm3,%%xmm10\n\t" \
/*xmm2=_y[2]=u \
xmm10=s=(25080*u>>16)-t2''*/ \
"movdqa %%xmm10,%%xmm2\n\t" \
"pmulhw %%xmm13,%%xmm10\n\t" \
"psubw %%xmm12,%%xmm10\n\t" \
/*xmm1:xmm6=s*21600+0x2800*/ \
"pxor %%xmm12,%%xmm12\n\t" \
"psubw %%xmm14,%%xmm12\n\t" \
"mov $0x28005460,%[a]\n\t" \
"movd %[a],%%xmm13\n\t" \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
"movdqa %%xmm10,%%xmm6\n\t" \
"movdqa %%xmm10,%%xmm1\n\t" \
"punpcklwd %%xmm12,%%xmm6\n\t" \
"pmaddwd %%xmm13,%%xmm6\n\t" \
"mov $0x0E3D,%[a]\n\t" \
"punpckhwd %%xmm12,%%xmm1\n\t" \
"pmaddwd %%xmm13,%%xmm1\n\t" \
/*xmm6=(s*21600+0x2800>>18)+s*/ \
"psrad $18,%%xmm6\n\t" \
"psrad $18,%%xmm1\n\t" \
"movd %[a],%%xmm12\n\t" \
"packssdw %%xmm1,%%xmm6\n\t" \
"pshufd $00,%%xmm12,%%xmm12\n\t" \
"paddw %%xmm10,%%xmm6\n\t" \
/*xmm6=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \
"mov $0x7FFF54DC,%[a]\n\t" \
"pcmpeqw %%xmm15,%%xmm10\n\t" \
"movd %[a],%%xmm13\n\t" \
"psubw %%xmm14,%%xmm10\n\t" \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
"paddw %%xmm10,%%xmm6\n\t " \
/*xmm1,xmm3,xmm10,xmm11 are free.*/ \
/*xmm11:xmm10=54491*t5'''+0x0E3D*/ \
"movdqa %%xmm5,%%xmm10\n\t" \
"movdqa %%xmm5,%%xmm11\n\t" \
"punpcklwd %%xmm5,%%xmm10\n\t" \
"pmaddwd %%xmm13,%%xmm10\n\t" \
"mov $0x8E3A8E3A,%[a]\n\t" \
"punpckhwd %%xmm5,%%xmm11\n\t" \
"pmaddwd %%xmm13,%%xmm11\n\t" \
"movd %[a],%%xmm13\n\t" \
"paddd %%xmm12,%%xmm10\n\t" \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
"paddd %%xmm12,%%xmm11\n\t" \
/*xmm7:xmm12=36410*t6''' \
xmm1=t6'''*/ \
"movdqa %%xmm7,%%xmm3\n\t" \
"movdqa %%xmm7,%%xmm1\n\t" \
"pmulhw %%xmm13,%%xmm3\n\t" \
"pmullw %%xmm13,%%xmm7\n\t" \
"paddw %%xmm1,%%xmm3\n\t" \
"movdqa %%xmm7,%%xmm12\n\t" \
"punpckhwd %%xmm3,%%xmm7\n\t" \
"punpcklwd %%xmm3,%%xmm12\n\t" \
/*xmm10=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \
"paddd %%xmm12,%%xmm10\n\t" \
"paddd %%xmm7,%%xmm11\n\t" \
"psrad $16,%%xmm10\n\t" \
"pcmpeqw %%xmm15,%%xmm5\n\t" \
"psrad $16,%%xmm11\n\t" \
"psubw %%xmm14,%%xmm5\n\t" \
"packssdw %%xmm11,%%xmm10\n\t" \
"pxor %%xmm12,%%xmm12\n\t" \
"paddw %%xmm5,%%xmm10\n\t" \
/*xmm5=_y[5]=u \
xmm1=s=t6'''-(36410*u>>16)*/ \
"psubw %%xmm14,%%xmm12\n\t" \
"movdqa %%xmm10,%%xmm5\n\t" \
"mov $0x340067C8,%[a]\n\t" \
"pmulhw %%xmm13,%%xmm10\n\t" \
"movd %[a],%%xmm13\n\t" \
"paddw %%xmm5,%%xmm10\n\t" \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
"psubw %%xmm10,%%xmm1\n\t" \
/*xmm11:xmm3=s*26568+0x3400*/ \
"movdqa %%xmm1,%%xmm3\n\t" \
"movdqa %%xmm1,%%xmm11\n\t" \
"punpcklwd %%xmm12,%%xmm3\n\t" \
"pmaddwd %%xmm13,%%xmm3\n\t" \
"mov $0x7B1B,%[a]\n\t" \
"punpckhwd %%xmm12,%%xmm11\n\t" \
"pmaddwd %%xmm13,%%xmm11\n\t" \
/*xmm3=(s*26568+0x3400>>17)+s*/ \
"psrad $17,%%xmm3\n\t" \
"psrad $17,%%xmm11\n\t" \
"movd %[a],%%xmm12\n\t" \
"packssdw %%xmm11,%%xmm3\n\t" \
"pshufd $00,%%xmm12,%%xmm12\n\t" \
"paddw %%xmm1,%%xmm3\n\t" \
/*xmm3=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \
"mov $0x7FFF7B16,%[a]\n\t" \
"pcmpeqw %%xmm15,%%xmm1\n\t" \
"movd %[a],%%xmm13\n\t" \
"psubw %%xmm14,%%xmm1\n\t" \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
"paddw %%xmm1,%%xmm3\n\t " \
/*xmm1,xmm7,xmm10,xmm11 are free.*/ \
/*xmm11:xmm10=64277*t7''+0x7B1B*/ \
"movdqa %%xmm9,%%xmm10\n\t" \
"movdqa %%xmm9,%%xmm11\n\t" \
"punpcklwd %%xmm9,%%xmm10\n\t" \
"pmaddwd %%xmm13,%%xmm10\n\t" \
"mov $0x31F131F1,%[a]\n\t" \
"punpckhwd %%xmm9,%%xmm11\n\t" \
"pmaddwd %%xmm13,%%xmm11\n\t" \
"movd %[a],%%xmm13\n\t" \
"paddd %%xmm12,%%xmm10\n\t" \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
"paddd %%xmm12,%%xmm11\n\t" \
/*xmm12:xmm7=12785*t4''*/ \
"movdqa %%xmm8,%%xmm7\n\t" \
"movdqa %%xmm8,%%xmm1\n\t" \
"pmullw %%xmm13,%%xmm7\n\t" \
"pmulhw %%xmm13,%%xmm1\n\t" \
"movdqa %%xmm7,%%xmm12\n\t" \
"punpcklwd %%xmm1,%%xmm7\n\t" \
"punpckhwd %%xmm1,%%xmm12\n\t" \
/*xmm10=u=(12785*t4''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \
"paddd %%xmm7,%%xmm10\n\t" \
"paddd %%xmm12,%%xmm11\n\t" \
"psrad $16,%%xmm10\n\t" \
"pcmpeqw %%xmm15,%%xmm9\n\t" \
"psrad $16,%%xmm11\n\t" \
"psubw %%xmm14,%%xmm9\n\t" \
"packssdw %%xmm11,%%xmm10\n\t" \
"pxor %%xmm12,%%xmm12\n\t" \
"paddw %%xmm9,%%xmm10\n\t" \
/*xmm1=_y[1]=u \
xmm10=s=(12785*u>>16)-t4''*/ \
"psubw %%xmm14,%%xmm12\n\t" \
"movdqa %%xmm10,%%xmm1\n\t" \
"mov $0x3000503B,%[a]\n\t" \
"pmulhw %%xmm13,%%xmm10\n\t" \
"movd %[a],%%xmm13\n\t" \
"psubw %%xmm8,%%xmm10\n\t" \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
/*xmm8:xmm7=s*20539+0x3000*/ \
"movdqa %%xmm10,%%xmm7\n\t" \
"movdqa %%xmm10,%%xmm8\n\t" \
"punpcklwd %%xmm12,%%xmm7\n\t" \
"pmaddwd %%xmm13,%%xmm7\n\t" \
"punpckhwd %%xmm12,%%xmm8\n\t" \
"pmaddwd %%xmm13,%%xmm8\n\t" \
/*xmm7=(s*20539+0x3000>>20)+s*/ \
"psrad $20,%%xmm7\n\t" \
"psrad $20,%%xmm8\n\t" \
"packssdw %%xmm8,%%xmm7\n\t" \
"paddw %%xmm10,%%xmm7\n\t" \
/*xmm7=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \
"pcmpeqw %%xmm15,%%xmm10\n\t" \
"psubw %%xmm14,%%xmm10\n\t" \
"paddw %%xmm10,%%xmm7\n\t " \
# define OC_TRANSPOSE8x8 \
"#OC_TRANSPOSE8x8\n\t" \
"movdqa %%xmm4,%%xmm8\n\t" \
/*xmm4 = f3 e3 f2 e2 f1 e1 f0 e0*/ \
"punpcklwd %%xmm5,%%xmm4\n\t" \
/*xmm8 = f7 e7 f6 e6 f5 e5 f4 e4*/ \
"punpckhwd %%xmm5,%%xmm8\n\t" \
/*xmm5 is free.*/ \
"movdqa %%xmm0,%%xmm5\n\t" \
/*xmm0 = b3 a3 b2 a2 b1 a1 b0 a0*/ \
"punpcklwd %%xmm1,%%xmm0\n\t" \
/*xmm5 = b7 a7 b6 a6 b5 a5 b4 a4*/ \
"punpckhwd %%xmm1,%%xmm5\n\t" \
/*xmm1 is free.*/ \
"movdqa %%xmm6,%%xmm1\n\t" \
/*xmm6 = h3 g3 h2 g2 h1 g1 h0 g0*/ \
"punpcklwd %%xmm7,%%xmm6\n\t" \
/*xmm1 = h7 g7 h6 g6 h5 g5 h4 g4*/ \
"punpckhwd %%xmm7,%%xmm1\n\t" \
/*xmm7 is free.*/ \
"movdqa %%xmm2,%%xmm7\n\t" \
/*xmm7 = d3 c3 d2 c2 d1 c1 d0 c0*/ \
"punpcklwd %%xmm3,%%xmm7\n\t" \
/*xmm2 = d7 c7 d6 c6 d5 c5 d4 c4*/ \
"punpckhwd %%xmm3,%%xmm2\n\t" \
/*xmm3 is free.*/ \
"movdqa %%xmm0,%%xmm3\n\t" \
/*xmm0 = d1 c1 b1 a1 d0 c0 b0 a0*/ \
"punpckldq %%xmm7,%%xmm0\n\t" \
/*xmm3 = d3 c3 b3 a3 d2 c2 b2 a2*/ \
"punpckhdq %%xmm7,%%xmm3\n\t" \
/*xmm7 is free.*/ \
"movdqa %%xmm5,%%xmm7\n\t" \
/*xmm5 = d5 c5 b5 a5 d4 c4 b4 a4*/ \
"punpckldq %%xmm2,%%xmm5\n\t" \
/*xmm7 = d7 c7 b7 a7 d6 c6 b6 a6*/ \
"punpckhdq %%xmm2,%%xmm7\n\t" \
/*xmm2 is free.*/ \
"movdqa %%xmm4,%%xmm2\n\t" \
/*xmm2 = h1 g1 f1 e1 h0 g0 f0 e0*/ \
"punpckldq %%xmm6,%%xmm2\n\t" \
/*xmm4 = h3 g3 f3 e3 h2 g2 f2 e2*/ \
"punpckhdq %%xmm6,%%xmm4\n\t" \
/*xmm6 is free.*/ \
"movdqa %%xmm8,%%xmm6\n\t" \
/*xmm6 = h5 g5 f5 e5 h4 g4 f4 e4*/ \
"punpckldq %%xmm1,%%xmm6\n\t" \
/*xmm8 = h7 g7 f7 e7 h6 g6 f6 e6*/ \
"punpckhdq %%xmm1,%%xmm8\n\t" \
/*xmm1 is free.*/ \
"movdqa %%xmm0,%%xmm1\n\t" \
/*xmm0 = h0 g0 f0 e0 d0 c0 b0 a0*/ \
"punpcklqdq %%xmm2,%%xmm0\n\t" \
/*xmm1 = h1 g1 f1 e1 d1 c1 b1 a1*/ \
"punpckhqdq %%xmm2,%%xmm1\n\t" \
/*xmm2 is free.*/ \
"movdqa %%xmm3,%%xmm2\n\t" \
/*xmm2 = h2 g2 f2 e2 d2 c2 b2 a2*/ \
"punpcklqdq %%xmm4,%%xmm2\n\t" \
/*xmm3 = h3 g3 f3 e3 d3 c3 b3 a3*/ \
"punpckhqdq %%xmm4,%%xmm3\n\t" \
/*xmm4 is free.*/ \
"movdqa %%xmm5,%%xmm4\n\t" \
/*xmm4 = h4 g4 f4 e4 d4 c4 b4 a4*/ \
"punpcklqdq %%xmm6,%%xmm4\n\t" \
/*xmm5 = h5 g5 f5 e5 d5 c5 b5 a5*/ \
"punpckhqdq %%xmm6,%%xmm5\n\t" \
/*xmm6 is free.*/ \
"movdqa %%xmm7,%%xmm6\n\t" \
/*xmm6 = h6 g6 f6 e6 d6 c6 b6 a6*/ \
"punpcklqdq %%xmm8,%%xmm6\n\t" \
/*xmm7 = h7 g7 f7 e7 d7 c7 b7 a7*/ \
"punpckhqdq %%xmm8,%%xmm7\n\t" \
/*xmm8 is free.*/ \
/*SSE2 implementation of the fDCT for x86-64 only.
Because of the 8 extra XMM registers on x86-64, this version can operate
without any temporary stack access at all.*/
void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
ptrdiff_t a;
__asm__ __volatile__(
/*Load the input.*/
"movdqa 0x00(%[x]),%%xmm0\n\t"
"movdqa 0x10(%[x]),%%xmm1\n\t"
"movdqa 0x20(%[x]),%%xmm2\n\t"
"movdqa 0x30(%[x]),%%xmm3\n\t"
"movdqa 0x40(%[x]),%%xmm4\n\t"
"movdqa 0x50(%[x]),%%xmm5\n\t"
"movdqa 0x60(%[x]),%%xmm6\n\t"
"movdqa 0x70(%[x]),%%xmm7\n\t"
/*Add two extra bits of working precision to improve accuracy; any more and
we could overflow.*/
/*We also add a few biases to correct for some systematic error that
remains in the full fDCT->iDCT round trip.*/
/*xmm15={0}x8*/
"pxor %%xmm15,%%xmm15\n\t"
/*xmm14={-1}x8*/
"pcmpeqb %%xmm14,%%xmm14\n\t"
"psllw $2,%%xmm0\n\t"
/*xmm8=xmm0*/
"movdqa %%xmm0,%%xmm8\n\t"
"psllw $2,%%xmm1\n\t"
/*xmm8={_x[7...0]==0}*/
"pcmpeqw %%xmm15,%%xmm8\n\t"
"psllw $2,%%xmm2\n\t"
/*xmm8={_x[7...0]!=0}*/
"psubw %%xmm14,%%xmm8\n\t"
"psllw $2,%%xmm3\n\t"
/*%[a]=1*/
"mov $1,%[a]\n\t"
/*xmm8={_x[6]!=0,0,_x[4]!=0,0,_x[2]!=0,0,_x[0]!=0,0}*/
"pslld $16,%%xmm8\n\t"
"psllw $2,%%xmm4\n\t"
/*xmm9={0,0,0,0,0,0,0,1}*/
"movd %[a],%%xmm9\n\t"
/*xmm8={0,0,_x[2]!=0,0,_x[0]!=0,0}*/
"pshufhw $0x00,%%xmm8,%%xmm8\n\t"
"psllw $2,%%xmm5\n\t"
/*%[a]={1}x2*/
"mov $0x10001,%[a]\n\t"
/*xmm8={0,0,0,0,0,0,0,_x[0]!=0}*/
"pshuflw $0x01,%%xmm8,%%xmm8\n\t"
"psllw $2,%%xmm6\n\t"
/*xmm10={0,0,0,0,0,0,1,1}*/
"movd %[a],%%xmm10\n\t"
/*xmm0=_x[7...0]+{0,0,0,0,0,0,0,_x[0]!=0}*/
"paddw %%xmm8,%%xmm0\n\t"
"psllw $2,%%xmm7\n\t"
/*xmm0=_x[7...0]+{0,0,0,0,0,0,1,(_x[0]!=0)+1}*/
"paddw %%xmm10,%%xmm0\n\t"
/*xmm1=_x[15...8]-{0,0,0,0,0,0,0,1}*/
"psubw %%xmm9,%%xmm1\n\t"
/*Transform columns.*/
OC_FDCT8x8
/*Transform rows.*/
OC_TRANSPOSE8x8
OC_FDCT8x8
/*TODO: zig-zag ordering?*/
OC_TRANSPOSE8x8
/*xmm14={-2,-2,-2,-2,-2,-2,-2,-2}*/
"paddw %%xmm14,%%xmm14\n\t"
"psubw %%xmm14,%%xmm0\n\t"
"psubw %%xmm14,%%xmm1\n\t"
"psraw $2,%%xmm0\n\t"
"psubw %%xmm14,%%xmm2\n\t"
"psraw $2,%%xmm1\n\t"
"psubw %%xmm14,%%xmm3\n\t"
"psraw $2,%%xmm2\n\t"
"psubw %%xmm14,%%xmm4\n\t"
"psraw $2,%%xmm3\n\t"
"psubw %%xmm14,%%xmm5\n\t"
"psraw $2,%%xmm4\n\t"
"psubw %%xmm14,%%xmm6\n\t"
"psraw $2,%%xmm5\n\t"
"psubw %%xmm14,%%xmm7\n\t"
"psraw $2,%%xmm6\n\t"
"psraw $2,%%xmm7\n\t"
/*Store the result.*/
"movdqa %%xmm0,0x00(%[y])\n\t"
"movdqa %%xmm1,0x10(%[y])\n\t"
"movdqa %%xmm2,0x20(%[y])\n\t"
"movdqa %%xmm3,0x30(%[y])\n\t"
"movdqa %%xmm4,0x40(%[y])\n\t"
"movdqa %%xmm5,0x50(%[y])\n\t"
"movdqa %%xmm6,0x60(%[y])\n\t"
"movdqa %%xmm7,0x70(%[y])\n\t"
:[a]"=&r"(a)
:[y]"r"(_y),[x]"r"(_x)
:"memory"
);
}
#endif

View File

@ -0,0 +1,49 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: x86state.c 15675 2009-02-06 09:43:27Z tterribe $
********************************************************************/
#include "x86enc.h"
#if defined(OC_X86_ASM)
#include "../cpu.c"
void oc_enc_vtable_init_x86(oc_enc_ctx *_enc){
ogg_uint32_t cpu_flags;
cpu_flags=oc_cpu_flags_get();
oc_enc_vtable_init_c(_enc);
if(cpu_flags&OC_CPU_X86_MMX){
_enc->opt_vtable.frag_sub=oc_enc_frag_sub_mmx;
_enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_mmx;
_enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
_enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx;
_enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_mmx;
}
if(cpu_flags&OC_CPU_X86_MMXEXT){
_enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext;
_enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext;
_enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext;
_enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext;
_enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext;
_enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext;
_enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext;
}
if(cpu_flags&OC_CPU_X86_SSE2){
# if defined(OC_X86_64_ASM)
/*_enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_x86_64sse2;*/
# endif
}
}
#endif

View File

@ -0,0 +1,47 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: x86int.h 15675 2009-02-06 09:43:27Z tterribe $
********************************************************************/
#if !defined(_x86_x86enc_H)
# define _x86_x86enc_H (1)
# include "../encint.h"
# include "x86int.h"
void oc_enc_vtable_init_x86(oc_enc_ctx *_enc);
unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src,
const unsigned char *_ref,int _ystride);
unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _thresh);
unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh);
unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _thresh);
unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh);
unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride);
void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64],
const unsigned char *_x,const unsigned char *_y,int _stride);
void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64],
const unsigned char *_x,int _stride);
void oc_enc_frag_copy2_mmxext(unsigned char *_dst,
const unsigned char *_src1,const unsigned char *_src2,int _ystride);
void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
#endif

View File

@ -0,0 +1,42 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: x86int.h 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#if !defined(_x86_x86int_H)
# define _x86_x86int_H (1)
# include "../internal.h"
void oc_state_vtable_init_x86(oc_theora_state *_state);
void oc_frag_copy_mmx(unsigned char *_dst,
const unsigned char *_src,int _ystride);
void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride,
const ogg_int16_t *_residue);
void oc_frag_recon_inter_mmx(unsigned char *_dst,
const unsigned char *_src,int _ystride,const ogg_int16_t *_residue);
void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1,
const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue);
void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi);
void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
void oc_state_frag_copy_list_mmx(const oc_theora_state *_state,
const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
int _dst_frame,int _src_frame,int _pli);
void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
void oc_restore_fpu_mmx(void);
#endif

View File

@ -0,0 +1,62 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: x86state.c 16503 2009-08-22 18:14:02Z giles $
********************************************************************/
#include "x86int.h"
#if defined(OC_X86_ASM)
#include "../cpu.c"
/*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into
each quadrant of the destination.*/
static const unsigned char OC_FZIG_ZAG_MMX[128]={
0, 8, 1, 2, 9,16,24,17,
10, 3,32,11,18,25, 4,12,
5,26,19,40,33,34,41,48,
27, 6,13,20,28,21,14, 7,
56,49,42,35,43,50,57,36,
15,22,29,30,23,44,37,58,
51,59,38,45,52,31,60,53,
46,39,47,54,61,62,55,63,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
64,64,64,64,64,64,64,64,
};
void oc_state_vtable_init_x86(oc_theora_state *_state){
_state->cpu_flags=oc_cpu_flags_get();
if(_state->cpu_flags&OC_CPU_X86_MMX){
_state->opt_vtable.frag_copy=oc_frag_copy_mmx;
_state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
_state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx;
_state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx;
_state->opt_vtable.idct8x8=oc_idct8x8_mmx;
_state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx;
_state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx;
_state->opt_vtable.state_loop_filter_frag_rows=
oc_state_loop_filter_frag_rows_mmx;
_state->opt_vtable.restore_fpu=oc_restore_fpu_mmx;
_state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX;
}
else oc_state_vtable_init_c(_state);
}
#endif

View File

@ -61,7 +61,7 @@ TheoraDecoder::GetCodecInfo(media_codec_info *info)
status_t
TheoraDecoder::Setup(media_format *inputFormat,
const void *infoBuffer, int32 infoSize)
const void *infoBuffer, size_t infoSize)
{
TRACE("TheoraDecoder::Setup\n");
if (!format_is_compatible(theora_encoded_media_format(),*inputFormat)) {
@ -163,8 +163,8 @@ TheoraDecoder::Decode(void *buffer, int64 *frameCount,
bool synced = false;
// get a new packet
void *chunkBuffer;
int32 chunkSize;
const void *chunkBuffer;
size_t chunkSize;
media_header mh;
status = GetNextChunk(&chunkBuffer, &chunkSize, &mh);
if (status == B_LAST_BUFFER_ERROR) {
@ -181,9 +181,22 @@ TheoraDecoder::Decode(void *buffer, int64 *frameCount,
// decode the packet
{
ogg_packet * packet = static_cast<ogg_packet*>(chunkBuffer);
ogg_packet packet;
if (mh.user_data_type == OGG_PACKET_DATA_TYPE) {
memcpy(&packet, mh.user_data, sizeof(packet));
} else {
// According to http://lists.xiph.org/pipermail/theora-dev/2004-May/002161.html
// this is invalid, but results from it are tolerable and better than nothing.
TRACE("TheoraDecoder::Decode: using compatibility chunk interpretation\n");
packet.b_o_s = 0;
packet.e_o_s = 0;
packet.granulepos = -1;
packet.packetno = 7;
}
packet.packet = (unsigned char *)chunkBuffer;
packet.bytes = chunkSize;
// push the packet in and get the decoded yuv output
theora_decode_packetin(&fState, packet);
theora_decode_packetin(&fState, &packet);
yuv_buffer yuv;
theora_decode_YUVout(&fState, &yuv);
// now copy the decoded yuv output to the buffer
@ -193,9 +206,9 @@ TheoraDecoder::Decode(void *buffer, int64 *frameCount,
uint draw_bytes_per_line = yuv.y_width + yuv.uv_width*2;
uint bytes_per_line = draw_bytes_per_line;
for (uint line = 0 ; line < fOutput.display.line_count ; line++) {
char * y = yuv.y;
char * u = yuv.u;
char * v = yuv.v;
unsigned char * y = yuv.y;
unsigned char * u = yuv.u;
unsigned char * v = yuv.v;
for (uint pos = 0 ; pos < draw_bytes_per_line ; pos += 4) {
out[pos] = *(y++);
out[pos+1] = *(u++);

View File

@ -42,4 +42,5 @@ public:
status_t GetSupportedFormats(media_format ** formats, size_t * count);
};
#endif _THEORA_CODEC_PLUGIN_H_
#endif // _THEORA_CODEC_PLUGIN_H_