国产精品一区一区,久久久综合香蕉尹人综合网,久久久久久久999精品视频

BigWorld引擎初识大观

RedLight — Sat, 14 Nov 2009 05:16:00 GMT

摘要: 阅读全文

RedLight 2009-11-14 13:16 发表评论

D3D与OpenGL常用API对译

RedLight — Sat, 14 Nov 2009 04:49:00 GMT

作�ؓ(f��)一�?D�E�序�? 我用�?ji��n)OpenGL两年�? 最�q�在搞一个项�? 从OpenGL转到D3D, 虽然工程外在的框枉��装得不�? 但想完全��C��OpenGL转换到D3D, 看�v来还是有隑ֺ��? �׃��(ji��n)我两个星期的旉��, 我终于�{换过来了(ji��n)�?br>D3D与OpenGL的几�Ҏ(gu��)��较明显不同的地方:
(一)、正交投影时�Q�OpenGL以屏�q�左上角�?0,0), 而D3D却以屏幕中心(j��)�?0,0)
(�?、OpenGL使用��x��坐标�p�，而D3D使用左手坐标�p?br>(�?、OpenGL使用旋�{操作�{��{入的角度参数�?角度, 而D3D�?弧度�Q�所以注意要PI * Angle / 180

下面我把具体地API对照关系列出�?不是很全�Q�以后添加中.......)

1. 坐标变换
pos = D3DXVECTOR3(0,2,-1.5);
at = D3DXVECTOR3(0,0,0);
up = D3DXVECTOR3(0,1,0);
D3DXMatrixLookAtLH(&view,&pos,&at,&up);
pd3dDevice->SetTransform(D3DTS_VIEW,&view);

2. �l�制
pd3dDevice->SetRenderState(D3DRS_FILLMODE,D3DFILL_WIREFRAME);

DrawPrimitive()
DrawIndexedPrimitive()

DrawPrimitiveUP()
DrawIndexedPrimitiveUP()
3. 颜色

4. 片段��试

(1) 深度��试
g_pDevice->SetRenderState(D3DRS_ZENABLE, TRUE);           //glEnable(GL_DEPTH_TEST);
g_pDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_LESSEQUAL);    //glDepthFunc(GL_LEQUAL);
//--------------------------------------------------------------------------------------------------------
g_pDevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_CCW);      //glEnable(GL_CULL_FACE);

(2) Alpha��试
//--------------------------------------------------------------------------------------------------------
g_pDevice->SetRenderState(D3DRS_ALPHATESTENABLE, TRUE);                //glEnable(GL_ALPHA_TEST);
g_pDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_GREATER);    //glAlphaFunc(GL_GREATER, 0.1f);
g_pDevice->SetRenderState(D3DRS_ALPHAREF, 0.1 * 255); //取��D��?0 ~ 255

(3) 剪裁��试 (�q�面剪切)
//--------------------------------------------------------------------------------------------------------
// Enable clip plane for reflection map
CMatrix44f pWorldViewProjIT=m_pWorldViewProj;
//pWorldViewProjIT.Transpose();
pWorldViewProjIT.Invert();

// Transform plane to clip-space
float pClipSpacePlane[4];
float pClipPlane[]= { 0, 0, 1, 0};

// Check if camera is below water surface, if so invert clip plane
CVector3f pEye=(CVector3f)m_pCamera.GetPosition();
if(-pEye.m_fZ<0.0)
{
pClipPlane[2]=-pClipPlane[2];
}

MatrixTransformPlane(pClipSpacePlane, pClipPlane, pWorldViewProjIT);

// enable clip plane now
g_pDevice->SetClipPlane(0, pClipSpacePlane);
g_pDevice->SetRenderState(D3DRS_CLIPPLANEENABLE, 1);

(4) 模板��试
//--------------------------------------------------------------------------------------------------------
g_pDevice->SetRenderState(D3DRS_STENCILENABLE, TRUE);
    g_pDevice->SetRenderState(D3DRS_STENCILFUNC, 3DCMP_ALWAYS);
    g_pDevice->SetRenderState(D3DRS_STENCILREF, 0x1); //取��D��?0 ~ 255

    Device->SetRenderState(D3DRS_STENCILPASS, D3DSTENCILOP_KEEP);

5. �U�理操作

g_pDevice->SetSamplerState( 0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
g_pDevice->SetSamplerState( 0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
g_pDevice->SetSamplerState( 0, D3DSAMP_MIPFILTER, D3DTEXF_LINEAR);

g_pDevice->SetSamplerState( 0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
  g_pDevice->SetSamplerState( 0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);

6. �~�冲区操�?br>
(1) 颜色�~�冲
//--------------------------------------------------------------------------------------------------------
g_pDevice->SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_ALPHA);
g_pDevice->SetRenderState(D3DRS_COLORWRITEENABLE, 0x000000F);

(2) 深度�~�冲
//--------------------------------------------------------------------------------------------------------
g_pDevice->SetRenderState(D3DRS_ZENABLE, TRUE);           //glEnable(GL_DEPTH_TEST);
g_pDevice->SetRenderState(D3DRS_ZWRITEENABLE, TRUE);        //glDepthMask(GL_TRUE);

(3) 模板�~�冲
//--------------------------------------------------------------------------------------------------------

(4) 渲染到纹�?br> //--------------------------------------------------------------------------------------------------------
// Render targets
IDirect3DSurface9 *m_plD3DBackbufferSurf,
*m_plD3DDepthStencilSurfAA,
*m_plD3DDepthStencilSurf;

CRenderTarget *m_pRTRefraction, *m_pRTReflection; //(自定义纹理类)

//-----------------------------------------------------------------------------------

// Get backbuffer
g_pDevice->GetRenderTarget(0, &m_plD3DBackbufferSurf);

// Get depthstencil
g_pDevice->GetDepthStencilSurface(&m_plD3DDepthStencilSurfAA);

// Restore previous states
g_pDevice->SetRenderTarget(0, m_plD3DBackbufferSurf);
g_pDevice->SetDepthStencilSurface(m_plD3DDepthStencilSurfAA);

// (1)折射�?-------------------------------------------------------------------------

//下面的语句调用了(ji��n) g_pDevice->CreateRenderTarget(iWidth, iHeight, (D3DFORMAT) iFormat, (D3DMULTISAMPLE_TYPE)iAASamples, 0, 0, &m_plD3Surf, 0));
if(FAILED(m_pRTRefraction->Create(m_fWidth>>1, m_fHeight>>1, D3DFMT_A8R8G8B8)))
{
  return APP_ERR_INITFAIL;
}

// Create depthstencil withouth multisampling
g_pDevice->CreateDepthStencilSurface(m_fWidth, m_fHeight, D3DFMT_D24X8, (D3DMULTISAMPLE_TYPE)0, 0, 0, &m_plD3DDepthStencilSurf, 0);

g_pDevice->SetRenderTarget(0, m_pRTReflection->GetSurface());

g_pDevice->StretchRect(m_plD3DBackbufferSurf, 0, m_pRTRefraction->GetSurface(), 0, D3DTEXF_NONE);

// (2)反射�?----------------------------------------------------------------------------------
m_pRTReflection=new CRenderTarget;
if(FAILED(m_pRTReflection->Create(m_fWidth>>2, m_fHeight>>2, D3DFMT_A8R8G8B8)))
{
  return APP_ERR_INITFAIL;
}

g_pDevice->SetRenderTarget(0, m_pRTReflection->GetSurface());

//-----------------------------------------------------------------------------------
g_pDevice->SetRenderTarget(0, m_pRTReflection->GetSurface());
g_pDevice->SetDepthStencilSurface(m_plD3DDepthStencilSurf);
g_pDevice->Clear(0, 0, D3DCLEAR_TARGET|D3DCLEAR_ZBUFFER, D3DCOLOR_ARGB(255, 0, 0, 128), 1.0f, 0);
SetViewport(m_pRTReflection->GetWidth(), m_pRTReflection->GetHeight());
//-----------------------------------------------------------------------------------

D3DXSaveTextureToFile("imageTex.jpg",D3DXIFF_JPG,(IDirect3DTexture9*)m_pWavesBump->GetTexture(),NULL);

7. 混合操作
g_pDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE);    //glDisable(GL_BLEND);
g_pDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_SRCALPHA);  //glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
g_pDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA);

8. 灯光与材�?br> g_pDevice->SetRenderState(D3DRS_LIGHTING, FALSE); //glDisable(GL_LIGHTING);

D3DMATERIAL9 mtrl;
mtrl.Ambient = a;
mtrl.Diffuse = d;
mtrl.Specular = s;
mtrl.Emissive = e;
mtrl.Power = p;
Device->SetMaterial(&mtrl); //在设�|�纹理前讑֮�
//讄��当前使用的纹�?br>

RedLight 2009-11-14 12:49 发表评论

D3D常用API

RedLight — Thu, 17 Sep 2009 07:55:00 GMT

D3DAPI大全�Q�全部函�?/h4>

//Direct3D 9.0 SDK 开发参考Direct3D 9.0 SDK 文档 (中文�?
词汇�?
DirectX 8 教程
你也可以�?COM 对象��想象成一套�ؓ(f��)某个主题而设计的一整套库函数。DX ��提供了(ji��n)一套完整的设计3D游戏的库�?
http://baike.baidu.com/view/1169027.htm
使用DirectX的不同组�Ӟ��你需要链接不同的�?r��n)态库。例如你要��用DirectDraw�l��g�Q�你��需要ddraw.lib�?
对于DirectDraw�Q�这个头文�g是ddraw.h�?/com�~�程
D3D.H
http://wowe1314.blog.163.com/blog/static/2358876200751191213583/
在Direct3D�~�程中，我们要做的工作基本上可以归纳为：(x��)
调用适当的函数获取接口指针；
调用接口的方法（成员函数�Q�来完成所需功能�Q?
用完接口后，调用Release�Ҏ(gu��)��q�行“释放”�Q�注意释��N��序应该和获取它们的顺序相反�?
http://www.lihuasoft.net/article/show.php?id=2928
Microsoft_DirectX_9.0c里的 9个DirectX的DLL
DX9和DX10在渲染流水线上都是有天壤之别的，好在DX高版本开发包�q�行库中包含�?ji��n)对低版本开发包�q�行库的实现�Q�所以用DX8开发的�E�序�Q�DX9�q�行库也能够很好的支持，在安装有D9�q�行库的�pȝ��上跑DX8开发的�E�序不需要再安装DX8�q�行库，但是�q�个兼容性支持在最�q�被微��Y逐渐攑ּ��Q�有时候DX9的不同更新版本做的程序也不能向下兼容�Q�比如DX9FEB2007SDK,同DX9AUG2006SDK在shader�~�译规则上也是不同的�Q?007攑ּ��?ji��n)VS2.0和PS2.0以下版本shader的支持，同时对于HLSL�?include相对路径引用的默认根目录也是有区别的.openGL的shader扩展不同的厂商有不同的扩展开发包�Q�但是这�U�情况随着GLSL和openGL2.0的出现有所改观.同时OpenGL是跨�q�_��的而DX不是�Q�这意味着用OpenGL和GNU   C++规则开发的�E�序可以同时在Linux,unix和安装有GNU环境的Windows上同时运行。从效率上来看，DX�׃��数据时批量写入显存的�Q�同OpenGL的单条函数写入来讲DX效率上要高一些，不过�q�来OpenGL也支持了(ji��n)批写入，只是支持批写入的OpenGL攑ּ��?ji��n)openGL一惯的优势也就是语�a�架构上的��z��得函数的数目变得很冗杂。在效果上看DX9同支持GLSL或CG扩展的openGL可以实现相同的显�C�效果。但是有一点不同是DXUT和D3DX在一些基��l�制上比glu和openGL   ARB   Extend要差一点，比如�l�制虚线�Q�DX没有好的函数可以是实现这一功能。但是DX的扩展工��h��openGL扩展工具又有多余的优势比如向量计��?GUI控�g,mesh优化和曲面展开�Q�PRT预计��等�{�和性能��试�{�等上又要强一炏V��DX10同OpenGL比较?y��u)��感觉openGL不是同一个数量��上的产品�Q�DX10在渲染流水线和架构上和能够实现的效果上要比DX9和openGL�q�步的多。要做面向未来的游戏产品��量�q�是用DX10吧�?

LPDIRECT3D9 D3D��L��?
LPDIRECT3DDEVICE9 D3D��g��L��?
LPDZRECT3DVERTXBUFFER9 ��点�~�冲区接�?
LPD3DVIEWPORT9 视口接口
LPD3DDISPLAYMODE D3D讑֤�昄��模式接口
LPD3DPRESENT_PARAMETERS 创徏�l�构用来保存讑֤�的显�C�模式接�?
LPD3DXVECTOR3 3D向量接口
LPDIRECT3DTEXTURE9 �U�理接口
ID3DXSprite �_��接口
g.pvb 成员函数
g_pD3D 成员函数
g_pD3D->GetAdapterDisplayMode(D3DADAPTER_DEFAULT,&d3ddm) 获取昄��模式
g_pd3dDevice 成员函数
g_pd3dDevice->SetRenderState�Q�，BOOL�Q?是否开启灯�?
g_pd3dDevice->SetTransform( D3DTS_WORLD, &(matWorld * matWorld_x));//��上面计��出来的旋�{矩阵�Q�设�|��ؓ(f��)世界变换矩阵
g_pd3dDevice->SetStreamSource( 0, g_pVB, 0, sizeof(CUSTOMVERTEX) );写入��?
g_pd3dDevice->SetFVF() 讄��FVF
g_pd3dDevice->DrawIndexedPrimitive( ��dŞ�? 0, 0, 点个�? 0, 三角形个�?); �?
timeGetTime 当前�pȝ��的时�?
DIRECT3DTXTURE 文理接口
BITMAPPEILEHEADER 文�g�?
BITMAPINFOHEADER 信息�?
fread ��M��个数据块
biBitcout 每个象素占几个字�?
bicompression 是否被压�~?
fseek 跻I��偏移指针
greatetxture 创徏一个空文理
D3Dcaked_RECT 锁定�l�构�?
setTexturestagestata 讄��文理操作
CONSTD3DMATRIX* �q�行变换的变换矩�?
�l�构�?
D3DPRESENT_PARAMETERS 存储D3D讑֤�信息
D3DXMATRIX 修改矩阵
数组
CUSTOMVERTEX 讄��点位置颜色信息

矩阵函数
D3DXMATRIX * D3DXMatrixIdentity�Q�POut,pM�Q?单位矩阵
D3DXMATRIX * D3DXMatrixTranspose�Q�上�Q?nbsp; 矩阵转置
D3DXMATRIX * D3DXMatrixInverse�Q�上中间加个FLOAT�Q?逆矩�?
D3DXMATRIX * D3DXMatrixTransformation�Q�）(j��)
D3DXMATRIX* D3DXMatrixTranslation�Q�输出矩阵，X�Q�Y�Q�Z�Q?�q�移变换
D3DXMATRIX * D3DXMatrixScaling�Q�上�Q?�~�放变换
FLOAT D3DXPlaneDotCoord�Q�pp,pv�Q?点和�q�面之见的关�p?
D3DXPLANE * D3DXPlaneFromPointNormal�Q�POUT�Q�PPOINT�Q�PNORMAL�Q?构造子
D3DXPLANE * D3DXPlaneFromPoints(Pout,pv1,pv2,pv3) 通过�Ҏ(gu��)��描述�q�面
D3DXPLANE * D3DPlaneNormalize(POUT,PP) 标准化一个��^�?
D3DXPLANE * D3DXPlaneTransform(POUT,PP,PM) �q�移�q�面
D3DXM
转换函数
D3DXMATRIX* D3DXMatrixLookAtLH(输出用于视图转换的矩阵，摄象机的位置�Q�摄象机面向的位�|�，摄象机的正方�? 视图转换的矩�?
D3DXMATRIX* D3DXMatrixOrthoLH(输出用于正交投媄(ji��ng)的交换矩阵，取景宽，取景高，取景��L��象机的最�q�距��，取景��L��象机的最�q�距��? 正交投媄(ji��ng)变换矩阵
D3DXMATRIX* D3DXMatrixPerspectiveFovLH(输出用于透视投媄(ji��ng)的交换矩阵，摄象机镜头的夹角Y�Q��^截台体的�U�|��比，�q��^截面的距��，�q��^截面的距��? 透视投媄(ji��ng)的矩�?
Direct3DCreate9(D3D版本) 创徏D3D对象
讑֤�函数
SetTransform(变换的类型，变换的变换矩�? 讄��左手或右手坐�?
SetViewport(视口指针) 讄��q�近距离
GetClientRect(hWnd,RECT) 获取�H�口�l�图区域
memcpy�Q�指针，数组�Q�长度）(j��) 拯��
SetStreamSource(0,G.pvb接口指针�Q?,长度) 数据��?
GetAdapterDisplayMode(指定昄��卡序列号�Q�存储显�C�模式的指针) 获取昑֍�的模�?
HRESULT CreateDevice(昑֍�序列��P��D3D讑֤��c�d��Q�所属窗口句柄，D3D�q�行3D�q�算�Q�存储D3D讑֤�相关信息指针�Q�返回D3D讑֤�借口指针的地址) 创徏讑֤�借口
HRESULT CreateVertexBuffer(��点�~�冲区大��（字节�Q�，��点�~�冲区属性，灉|��点格式�Q�顶点缓冲区内存位置�Q�顶点缓冲区指针地址�Q�保留参数通常�?) 创徏��点�~�冲
HRESULT CreateIndexBuffer(索引�~�冲区大��（字节�Q?��点�~�冲区属�?FMT颜色,��点�~�冲区内存位�|?索引�~�冲区指针地址�Q�保留参数通常�?)   创徏索引�~�冲
HRESULT Lock(加锁内存起始地址�Q�加锁内存大��，�q�回内存指针地址�Q�加锁属�? 加羃内存
HRESULT UnLock() 解锁
HRESULT SetStreamSource(渲染数据��序列号�Q�进行绑定连接的��点�~�冲区指针，�q�行�l�定�q�接渲染数据��的起始位置�Q�渲染数据流中一个顶�Ҏ(gu��)��占的内存大小) ��点�~�冲区和渲染数据��连�?
HRESULT SetFVF(灉|��点格式) 讄��点格式
HRESULT DrawPrimitive(�l�制的图元类型，�l�制的开始顶点的索引��|��l�制的图元数�? ��d��后向�~�冲�?
HRESULT DrawPrimitiveup() 可以直接�?
HRESULT Preesent(复制源的矩�Ş区域指针�Q�复制目的地的矩形区域指针，D3D讑֤��H�口句柄�Q�最��更新区域指�? 屏幕��{
HRESULT SetIndices(使用的烦(ch��)引缓冲区指针) 讄��当前�l�制的烦(ch��)引数�l?
DrawIndexedPrimitive(囑օ��c�d��Q�绘制到的烦(ch��)引缓冲区的开始地址�Q�最��的索引数组元素的��|��点的数目，开始的索引数组元素的��|��l�制的数�? 同DrawPrimitive()

�l�制函数
HRESULT DrawPrimitive(基本囑օ��c�d��Q��v始顶点，�l�制的图元的数量) 囑օ��l�制
HRESULT Clear(清楚的矩形区域数量，清除的�D行区域数�l�指针，清楚哪个�~�冲区，清除后重�|�的颜色�Q�清除后重置的深度，0-1.0�Q�重�|�的摸版�? 清空囑�Ş�l�制�?
HRESULT BeginScene() 开始绘�?
HRESULT EndScene() �l�束�l�制

�U�理函数
CreateTexture() 创徏D3D�U�理对象
LoadBmpTeture�Q�）(j��) 装蝲文理函数
LoadBmpTexture24Bit (LPDIRECT3DDEVICE9 pDevice,LPCSTR pSrcFile,LPDIRECT3DTEXTURE9 ppTexture) 24位纹�?
D3DXCreateTextureFromFile(D3D讑֤�指针�Q�纹理图形文�Ӟ��存储D3D文理的指针地址) 直接从磁盘获取纹�?
D3DXCreateTextureFromFileEx(D3D讑֤�指针�Q�纹理图形文�Ӟ��指定�U�理宽，高，指定渐进�U�理序列�U�数�Q�纹理��用方式一般�ؓ(f��)0�Q�指定纹理图形格式，�U�理存放的内存类型一般位�?�Q�纹理过滤方式，自动生成的纹理序列过滤方式，讄��透明�Ԍ��囑�Ş文�g信息存放地址可设�|?�Q�调色板存储地址�Q�创建的D3D文理的指针地址) 高��获取�U�理
HRESULT SetTexture(多��U�理的烦(ch��)�?-7�Q�D3D的纹理接口指�? 讄��当前要渲染的�U�理
HRESULT SetTextureStageState(多��U�理的烦(ch��)引，�U�理渲染状态的�c�d��Q�纹理渲染状态的��|��与类型相对应) 讄��U�理的渲染状�?
HRESULT SetSamplerState(指定�U�理属�?-7�Q�纹理采样属性类型，讄��U�理采样属�? �U�理采样
HRESULT CheckDeviceFormat(指定昑֍�序列�?D3D讑֤��c�d��,指定昄��模式格式�Q�缓冲区属性，需要��用查询的格式的设备类�?需要查询的昄��格式) �U�理压羃
HRESULT LockRect(指定加锁的纹理��别，指向D3DLOCKED_RECT�l�构�Q�要加锁的RECT区域-0代表整个区域�Q�加锁类�?�?或下表的�? 锁定�U�理
HRESULT UnlockRect(解锁的纹理��? 解锁�U�理
向量函数
D3DXVECTOR3 * D3DXVer3Length(V) 向量模的计算
D3DXVECTOR3 * D3DXVec3Normalize�Q�返回指针，V�Q?单位�?
D3DXVECTOR3 * D3DXVec3Add(�q�回的指针，u,v) 向量加法
D3DXVECTOR3 * D3DXVec3Subtract�Q�同上）(j��) 减法
D3DXVECTOR3 * D3DXVec3Cross(同上) 向量X�?
D3DXVECTOR3 * D3DXVec3Lerp(同上) ��C��
D3DXVECTOR3 * D3DXVec3Maximize�Q�同上）(j��) 取最大�?
D3DXVECTOR3 * D3DXVec3Minimize�Q�同上）(j��) 取最��?
D3DXVECTOR3 * D3DXVec3Scale�Q�返回指针，PV�Q�FLOAT�Q?比例
FLOAT D3DXVec3Dot(pv1,pv2) 点乘
参见�~�程�_��a(b��).chm中的COM中模块的导出函数
Private Type D3DVECTOR
    x As Single
    y As Single
    z As Single
End Type
'�q�回3D向量的规格化向量
Private Declare Function D3DXVec3Normalize Lib "DX8VB.DLL" Alias "VB_D3DXVec3Normalize" (VOut As D3DVECTOR, v As D3DVECTOR) As Long
Private Declare Function D3DXVec3Add Lib "DX8VB.DLL" Alias "VB_D3DXVec3Add" (VOut As D3DVECTOR, v1 As D3DVECTOR, V2 As D3DVECTOR) As Long
Private Declare Function D3DXVec3Subtract Lib "DX8VB.DLL" Alias "VB_D3DXVec3Subtract" (VOut As D3DVECTOR, v1 As D3DVECTOR, V2 As D3DVECTOR) As Long
Private Declare Function D3DXVec3Length Lib "DX8VB.DLL" Alias "VB_D3DXVec3Length" (v As D3DVECTOR) As Single
D3DFVF 自由��点的格�?
D3DFVF_DIFFUSE 包含谩反��的信息
D3DFVF_NORMAL 包含法线信息
D3DFVF_PSIZE ��点信息指明�l�制点的大小
D3DFVF_SPECULAR 包含镜面反射的信�?
D3DFVF_XYZ 包含未经转换的顶点坐�?
D3DFVF_XYZRHW 包含�l�过转换的顶点坐�?
D3DFVF_XYZB1 through D3DFVF_XYZB5 包含用于骨骼动化的顶点和��点寚w��骼的权重信息
D3DFVF_XYZW 包含�l�过转换和裁剪的��点坐标
D3DTRANSFORMSTATETYPE 变换的类�?

D3DPRIMITIVETYPE 定义基本囑օ�
D3DPT_POINTLIST 一�l�点的集�?
D3DPT_LINELIST 一�l�线的集�?
D3DPT_LINESTRIP 首尾相连的线�D늚�集合
D3DPT_TRIANGLELIST 一�l�三角�Ş的集�?
D3DPT_TRIANGLESTRIP 首尾相连的三角�Ş�Q�有两个��点集合
D3DPT_TRIANGLEFAN   �l�成扇�Ş的一�l�三角�Ş集合
D3DPT_FORCE_DWORD 未定义的
D3DDISPLAYMODE 屏幕昄��模式
D3DFMT_UNKNOWN 未知的象素格�?
D3DFMT_R8G8B8 24位色�Q�RGB各占8�?
D3DFMT_A8R8G8B8 32位色�Q�@RGB各占8�?
D3DFMT_X8R8G8B8 32位色�Q�X��Z��?�?RGB各占8�?
D3DFMT_R5G6B5 16位色�Q�R�?�Q�G�?�Q�B�?�?
D3DFMT_X1R5G5B5 16位色�Q�保�?位，RGB各占5�?
D3DFMT_A1R5G5B5 16位色�Q�@�?位，RG5各占5�?
D3DFMT_A4R4G4B4 16位色�Q�@RGB各占4�?
D3DFMT_R3G3B2 8位色�Q�R3,G3,B2�?
D3DFMT_A8 只有8位@
D3DFMT_A8R3G3B2 16位色�Q�@8�Q�R3�Q�G3�Q�B2�?
D3DFMT_X4R4G4B4 16位色
D3DFMT_A2B10G10R10 32位色�Q�@�?位，RGB�?0�?
D3DFMT_A8B8G8R8 32位色
D3DFMT_X8B8G8R8 32位色
D3DFMT_G16R16 32位色�Q�只有红和绿
D3DFMT_A2R10G10B10 32位色
D3DFMT_A16B16G16R16 64位色
D3DFMT_A8P8 8位色�Q?位表�C�半透明�Q?位表�C�颜�?
D3DFMT_P8 8位色�Q�用牙色索引��D��C?
D3DFMT_L8 8位色�Q�只表示亮度
D3DFMT_L16 16位色�Q�只表示亮度
D3DFMT_A8L8 16位色�Q?位表�C�半透明�Q?位表�C�Z��?
D3DFMT_A4L4 8位色�Q?位表�C�半透明�Q?位表�C�Z��?
D3DDEVTYPE_HAL   ��g抽象层，通过昄��g来完成图形渲染工�?
D3DDEVTYPE_NULLREF
D3DDEVTYPE_REF   参考光栅器�Q�一般用语测试显�C�卡不支持的D3D功能
D3DDEVTYPE_SW   用语支持�W�三方的软�g
D3DDEVTYPE_FORCE_DWORD 扩展�?
D3DCREATE 3D�q�算的方�?
D3DCREATE_ADAPTERGROUP_DEVICE
D3DCREATE_DISABLE_DRIVER_MANAGEMENT
D3DCREATE_DISABLE_DRIVER_MANAGEMENT_EX
D3DCREATE_FPU_PRESERVE   �Ȁ�z�d��_�ֺ��点�q�算或��Q点运��异常检��，讄��该项�?x��)降低系�l�性能
D3DCREATE_HARDWARE_VERTEXPROCESSING 由D3D��g�q�行��点预算
D3DCREATE_MIXED_VERTEXPROCESSING 由�؜合方式进行顶点运��?
D3DCREATE_MULTITHREADED   支持多线�E�绘�Ӟ��讄��该项�?x��)降低系�l�性能
D3DCREATE_NOWINDOWCHANGES
D3DCREATE_PUREDEVICE   ��用D3D的GET*�Q�）(j��)函数�Q�禁止D3D使用虚拟讑֤�模拟��点�q�算
D3DCREATE_SCREENSAVER
D3DCREATE_SOFTWARE_VERTEXPROCESSING 由D3D软�g�q�行��点�q�算
D3DSWAPEFFECT 取值列�?
D3DSWAPEFFECT_DISCARD    后台�~�冲区复制到前台�Ӟ��清除后台�~�冲区内�?
D3DSWAPEFFECT_FLIP    后台�~�冲区内容复制后�Q�保持不变，有多个后台缓冲区时��?
D3DSWAPEFFECT_COPY    后台�~�冲区内容复制后�Q�保持不变，只有1个后台缓冲区时��?
D3DSWAPEFFECT_FORCE_DWORD   ��该直作�ؓ(f��)32位存储，通常不用
D3DPRESENT 屏幕反�{模式列表
D3DPRESENT_DONOTWAIT
D3DPRESENT_INTERVAL_DEFAULT 默认的同ONE
D3DPRESENT_INTERVAL_ONE 当屏�q�刷��C��ơ时前台后台�q�行交换
D3DPRESENT_INTERVAL_TWO 当屏�q�刷��C��ơ时前台后台�q�行交换
D3DPRESENT_INTERVAL_THREE 当屏�q�刷��C��ơ时前台后台�q�行交换
D3DPRESENT_INTERVAL_FOUR 当屏�q�刷新四�ơ时前台后台�q�行交换
D3DPRESENT_INTERVAL_IMMEDIATE 囑�Ş�l�制完成时立卌��行交�?
D3DPRESENT_LINEAR_CONTENT
D3DUSAGE �~�冲区属性值列�?
D3DUSAGE_AUTOGENMIPMAP
D3DUSAGE_DEPTHSTENCIL
D3DUSAGE_DMAP
D3DUSAGE_DONOTCLIP ��用裁剪�Q�表�C�顶点缓冲区中的��点不进行裁剪，当设�|�该属性时�Q�渲染状态D3DRS_CLIPPING必须设�ؓ(f��)FALSE
D3DUSAGE_DYNAMIC 使用动态内存分�?
D3DUSAGE_NPATCHES 使用��点�~�冲区绘制N-patches曲线
D3DUSAGE_POINTS 使用��点�~�冲区绘制点
D3DUSAGE_RENDERTARGET
D3DUSAGE_RTPATCHES 使用��点�~�冲区绘制曲�U?
D3DUSAGE_SOFTWAREPROCESSING 使用软�g�q�行��点�q�算�Q�否则��用硬件计��?
D3DUSAGE_WRITEONLY 只写属性，不能�q�行��L��作，讄��该属性可以提高系�l�性能
D3DPOOL �~�冲��源内存位�|�列�?
D3DPOOL_DEFAULT 默认的，��点�~�冲区尽可能存在与显存中
D3DPOOL_MANAGED 由D3D自动调度��点�~�冲区内存位�|�（昑֭�和内存）(j��)
D3DPOOL_SCRATCH ��点�~�冲��Z��于计��机的��(f��)时内存中�Q�这�U�类型的��点�~�冲��Z��能直接进行渲染，只能�q�行内存枷锁�Q�拷贝等操作
D3DPOOL_SYSTEMMEM ��点�~�冲��Z��于内存中
D3DLOCK �~�冲区加�?
D3DLOCK_DISCARD 更新整个�~�冲�?
D3DLOCK_DONOTWAIT
D3DLOCK_NO_DIRTY_UPDATE 在加锁的�q�程中系�l�进行其他操作（默认有Dirty标记)
D3DLOCK_NOOVERWRITE 保证不腹�E�缓冲区数据�Q�设�|�该属性可以立卌��回内存指针，提高�pȝ��性能
D3DLOCK_NOSYSLOCK 在加锁的�q�程中系�l�可能执行其他操�?
D3DLOCK_READONLY 讄��~�冲��Z��只读属�?
D3DXVECTOR3 向量��法
D3DXVECTOR3u(x,y,z);
D3DXVECTOR3v(x,y,z);
float 变量=D3DXVec3Dot(u指针,v指针) 点乘
D3DXMATRIX 矩阵
D3DXMatrixIdentity 单位矩阵
D3DXMatrixInverse 逆矩�?
D3D实现囑�Ş变换
D3DXMatrixTranslation �q�移矩阵
D3DXMatrixLockAtLH 观察矩阵
D3DXMatrixIdentity ��一个矩阵单位化
D3DXMatrixRotationY �l�Y轴�{
D3DXMatrixRotationX �l�X轴�{
D3DXMatrixRotationZ �l�Z轴�{
D3DXMatrixScaling �~�放变换
D3DXMatrixMuLationAxis 围绕��L��一个��u旋�{
D3DXMatrixMultiply �l�合变换
D3DUSAGE �U�理使用
D3DUSAGE_AUTOGENMIPMAP 自动生成多��渐进�U�理序列�Q�该方式在资源处于D3DPOOL_SYSTEMMEM时无�?
D3DUSAGE_DEPTHSTENCIL 深度模版�~�冲区，只在资源处于D3DPOOL_default时有�?
D3DUSAGE_DMAP 该纹理是一个置换纹�?
D3DUSAGE_DONOTCLIP
D3DUSAGE_DYNAMIC
D3DUSAGE_NPATCHES
D3DUSAGE_POINTS
D3DUSAGE_RENDERTARGET 该文理是一个渲染目标缓冲区
D3DUSAGE_RTPATCHES
D3DUSAGE_SOFTWAREPROCESSING 应用坐标变换
D3DUSAGE_WRITEONLY
D3DTEXTURESTAGESTATETYPE 渲染状态类�?
D3DTSS_COLOROP   1 文理层的颜色混合方式
D3DTSS_COLORARG1 2 颜色混合的第一个参�?
D3DTSS_COLORARG2 3 颜色混合的第二个参数
D3DTSS_ALPHAOP   4 指定�U�理层的Alpha透明
D3DTSS_ALPHAARG1 5 Alpha混合的第一个参�?
D3DTSS_ALPHAARG2 6 Alpha混合的第二个参数
D3DTSS_BUMPENVMAT00 7 �l�制凹凸�U�理�?
D3DTSS_BUMPENVMAT01 8 �l�制凹凸�U�理�?
D3DTSS_BUMPENVMAT10 9 �l�制凹凸�U�理�?
D3DTSS_BUMPENVMAT11 10 �l�制凹凸�U�理�?
D3DTSS_TEXCOORDINDEX 11 该纹理层使用的纹理坐标的索引
D3DTSS_BUMPENVLSCALE 22 �l�制凹凸�U�理的羃攑֏��?
D3DTSS_BUMPENVLOFFSET   23 �l�制凹凸�U�理的��^�U�d��?
D3DTSS_TEXTURETRANSFORMFLAGS 24 控制�U�理坐标的�{换标�?
D3DTSS_COLORARG0 26 指定混合�q�程的第三个颜色
D3DTSS_ALPHAARG0 27 Alpha混合的第三个参数
D3DTSS_RESULTARG 28 颜色混合的结果输出寄存器
D3DTSS_CONSTANT 32 颜色混合的常量寄存器
D3DTSS_FORCE_DWORD 0x7fffffff 强制转换�?2位，通常不用
D3DSAMPLERSTATETYPE �U�理采样属�?
D3DSAMP_ADDRESSU 1 包装�U�理
D3DSAMP_ADDRESSV 2 包装�U�理
D3DSAMP_ADDRESSW 3 包装�U�理
D3DSAMP_BORDERCOLOR 4
D3DSAMP_MAGFILTER 5 处理攑֤��q��o(h��)
D3DSAMP_MINFILTER 6 处理�~�小�q��o(h��)
D3DSAMP_MIPFILTER 7 多纹理过�?
D3DSAMP_MIPMAPLODBIAS 8 多��文理�U�数偏移��|��初试直�ؓ(f��)0
D3DSAMP_MAXMIPLEVEL 9 最大多�U�理�U�别�Q�初试��gؓ(f��)0
D3DSAMP_MAXANISOTROPY 10 各向异性，初试��gؓ(f��)1
D3DSAMP_SRGBTEXTURE 11
D3DSAMP_ELEMENTINDEX   12
D3DSAMP_DMAPOFFSET 13
D3DSAMP_FORCE_DWORD 0x7fffffff 强制转换32位，通常不用
�U�理��d��
D3DTADDRESS_WRAP 1 包装�U�理��d��
D3DTADDRESS_MIRROR 2 镜像�U�理��d��
D3DTADDRESS_CLAMP 3 夹取�U�理��d��
D3DTADDRESS_BORDER 4 �Ҏ(gu��)��颜色�U�理��d��
D3DTADDRESS_MIRRORONCE 5 一�ơ镜像纹理寻址
D3DTADDRESS_FORCE_DWORD 0x7fffffff强制转换32位，通常不用
世界变换
D3DTS_WORLD 世界变换

RedLight 2009-09-17 15:55 发表评论

D3d9的一些更�?(�?

RedLight — Sat, 12 Sep 2009 07:40:00 GMT

�׃��Aug 8造成的D3D9恐惧症已�l�完全消除了(ji��n)�Q�这一章将�?x��)给大家介绍�?D引擎转向D3D9的各个方面，包括�l�于出现的全屏幕模式。从�q�章以后�Q�我��用D3D9作�ؓ(f��)讲解的语�a��l�箋D2D教程�?br>
【OP�l�束�Q�开始正片�?br>
『Why�Q��?br>
　　估计大家首先要问的就�?#8220;Why�Q?#8221;��Z��么要前进到D3D9�Q�理由如下：(x��)
1、D3D9修复�?ji��n)D3D8已知的所有Bug�Q�因此运行�v来更�E�_��Q�速度也要快�?br>2、D3D9提供�?ji��n)许多便利的新功能，虽然�l�大多数是面�?D的，但是也有不少2D适用的，比如IDirect3DDevice9::StretchRect�Q�以�?qi��ng)对IDirect3DSurface9的改�q�等�{�。D3DX库就更多�?ji��n)，比如D3DXSaveSurfaceToFileInMemory�Q�一开始没发现�q�个函数有啥用处�Q�现在基本离不开�?ji��n)�?br>3、HLSL。就像上一话我说的那样�Q�D2D教程以后�?x��)有PixelShader的内宏V��我可不��x��汇编来写Shader�Q�会(x��)��M�h的（��贺我吧�Q�终于抛弃汇�~�Shader�?#8230;…�Q�。虽然说�q�不是决定性的理由�Q�因��有Cg什么的�Q�不�q�我想编写显卡无关的代码�Q�因此我不去研究Cg�Q�反正和HLSL差不多）(j��)以及(qi��ng)R2VB之类�?br>4、ID3DXFont�Q�往下看你就知道�?ji��n)�?br>
《D3D的变化�?br>
『界面名�U�变化�?br>
　　一句话�Q?�Ҏ(gu��)��9��p��?br>
�?#8220;创徏”型方法的一个统一变化�?br>
　　许多Create*()�Ҏ(gu��)��Q�比如创��备、创建纹理、创建顶点缓冲等�{�，多了(ji��n)一个HANDLE* pSharedHandle参数�Q�无用，NULL之（看来微��Y原打��弄个共享句柄之�c�，不过被D3D10巨大的变化��Q云了(ji��n)�Q?br>
『创建D3D讑֤�的变化�?br>
　　D3DPRESENT_PARAMS的FullScreen_PresentationInterval变成�?ji��n)PresentationInterval�Q�也��是说即使在�H�口模式下也可以做到垂直同步来防止撕裂现象（2D的福韛_��Q�。相应的�Q�D3DSWAPEFFECT_COPY_VSYNC消失�?ji��n)，反正�q�个效果也不咋的�Q�消�׃��(ji��n)也好�?br>　　要做到垂直同步需要给PresentationInterval赋值D3DPRESENT_INTERVAL_DEFAULT或D3DPRESENT_INTERVAL_ONE。其中D3DPRESENT_INTERVAL_ONE的效果比D3DPRESENT_INTERVAL_DEFAULT好一点，不过相应的也�?x��)占用多一点点�pȝ��资源……真的只有一点点而已�Q�实在是无所谓的……
　　如果不要垂直同步�Q�想要看看实际祯速的话，D3DPRESENT_INTERVAL_IMMEDIATE�?br>　　注意在窗口模式下�Q�你只能使用�q�三�U�Present模式�Q�全屏幕模式下就可以使用别的�Q�但是要首先��(g��)��D3DCAPS9以查看显卡是否支持）(j��)。不�q�我感觉�?9�Q�的游戏来说�Q�有�q�三个就��_��?ji��n)�?br>　　另外在窗口模式下�Q�BackBufferFormat也可以设�|�成D3DFMT_UNKNOWN�Q�D3D�?x��)自动获取当前桌面的格式讑֮�成后备缓冲的格式�Q�省去GetDisplayMode。实际上�Q�窗口模式下的后备缓冲已�l�不需要和桌面格式相同�Q�你可以通过IDirect3D9::CheckDeviceFormatConversion来检查，如果�q�个讑֤�支持�q�两�U�颜色格式之间的转换�Q�就可以�l�程序的后备�~�冲讑֮�上不同的格式。我试过在桌面格式�ؓ(f��)32Bit�Q�D3DFMT_X8R8G8B8�Q�时��程序的后备�~�冲格式讄��为D3DFMT_R5G6B5�Q?6Bit�Q�，发现�?ji��n)速度提升�Q�也��是说这个设定是有意义的�?br>　　可创建的讑֤��c�d��多了(ji��n)一�U�D3DDEVTYPE_NULLREF�Q�在安装�?ji��n)D3D SDK的机子上�{�同于D3DDEYTYPE_REF�Q�在其他的机子上�Q�这�U�设备实际上没有创徏真正意义的D3D讑֤��Q�只是允�怽�创徏的纹理、表面等资源�Q�但是Render、Present�{�操作都�?x��)无效（实际上这些资源都创徏在�?ji��n)D3DPOOL_SCRATCH池里�Q�不��你讑֮�使用的是什么POOL�Q�。也��是��_(d��)��仅仅在模拟基本的�q�行而已。你可以用这个设备来�~�写一个利用D3DX函数库进行图像格式�{换的�E�序�Q�比如把一大堆不同的格式�{换成易于D3D9使用的DDS格式。因为实际上没有创徏讑֤��Q�你甚至可以�~�写成控制台的，通过GetConsoleWindow的方法获得HWND。Mercury 3用的MIF格式的�{换器��是�q�么做出来的。注意D3DDEVTYPE_NULLREF只能用在IDirect3D::CreateDevice�Ӟ��其他的方法都不行�?br>
『创��面的变化�?br>
　　创徏表面�Q�Surface�Q�的�Ҏ(gu��)��变成�?ji��n)IDirect3DDevice9::CreateOffscreenPlainSurface�Q�参数很��单不用多��_(d��)��需要注意的是可以选择POOL�?ji��n)�?br>
『设定FVF的变化�?br>
　　讑֮�FVF�Ӟ��原来通过IDirect3DDevice8::SetVertexShader�Q�现在有�?ji��n)一个专门用来设定FVF的方法：(x��)IDirect3DDevice9::SetFVF。这是个很好的变化，省得把FVF和Shader弄�؜�Q�题外话�Q�也��是因�ؓ(f��)�q�个变化�Q�让Shader在设备Reset后得以保存，不错不错�Q?br>
『获取后备缓册Ӏ?br>
　　D3D9现在允许有多个后备缓冲交换链�Q�不�q�对�?D来说�Q�基本不需要这�U�东西，IDirect3DDevice9::GetBackBuffer多出来的�W�一个参数赋�?卛_��。如果你有兴��，可以�ȝ��I�一下这个玩意，有时候可以用来做分场�?br>
�?font size=+0>SetStreamSource�?br>
　　�q�个�Ҏ(gu��)��的功能被扩展�?ji��n)，��?gu��)��参数��可以知道，多出来的OffsetInBytes允许你选择一个顶点缓冲的Offset�Q�D3D9��从�q�个Offset之后开始读取数据。因此你可以把几�l�用来渲染纹理的正方形顶点存储到一个顶点缓冲里面�?br>
�?font size=+0>SetSamplerState�?br>
　　�q�个是D3D9的新�Ҏ(gu��)��Q�把原先SetTextureStageState的一些功能独立了(ji��n)出来�Q�和2D关系最密切的就是纹理过滤了(ji��n)。原先的D3DTSS_MINFILTER变成�?ji��n)D3DSAMP_MINFILTER�Q�相应的D3DTSS_MAGFILTER也变成D3DSAMP_MAGFILTER�Q�D3DTSS_MAXANISOTROPY变成D3DSAMP_MAXANISOTROPY。另外还有更多的�Q�比如纹理寻址�{�。你�ȝ��一下D3DSAMPLERSTATETYPE枚�D�c�d��的内容就知道�?#8220;�q�移”�?ji��n)些什么�?br>　　�q�个变化对于Shader来说很方�ѝ��改成Sampler的东西在PixelShader�q�程也会(x��)有效�Q�而没有更改的东西在PixelShader��׃��?x��)有效�?ji��n)。D3D8时候把�q�些全都攑֜��?ji��n)一��P��Ҏ(gu��)��造成混�ؕ�?br>
�?font size=+0>SetRenderTarget�?br>
　　D3D9现在允许多重RenderTarget存在�Q�不�q�我们基本上只用一个，RenderTargetIndex设�ؓ(f��)0�Q�第二个参数仍然是需要设定的表面。与D3D8相同的是�Q�在讑֮�之前仍然需要先通过GetSurfaceLevel获得表面才行�?br>
『顶点缓冲的锁定�?br>
　　注意IDirect3DVertexBuffer9::Lock的第三个参数�Q�从原来的BYTE**变成�?ji��n)void**。也��是�q�样�?#8230;…

『其他的一些变化�?br>
1、CopyRects变成�?ji��n)UpdateSurface。和UpdateTexture一��P��只能从D3DPOOL_SYSTEMMEM拯��到D3DPOOL_DEFAULT
2、增加了(ji��n)一个比较有用的IDirect3DDevice9::ColorFill�Ҏ(gu��)��Q�作用是向D3DPOOL_DEFAULT的某个区域填充颜�Ԍ��和Clear的功能类��|��但是在��用目的上要比Clear明确的多�Q��ƈ且由于不牉|��深度�~�冲之类�Q�速度要快一些�?br>3、增加了(ji��n)一个IDirect3DDevice9::StretchRect�Ҏ(gu��)��Q�通过�q�个�Ҏ(gu��)��可以在D3DPOOL_DEFAULT的表面或�U�理之间�q�行带过滤器的羃放操作，免去利用Render的过�E�，非常有用。不�q�这个方法由于��用了(ji��n)��g处理�Q�限制较多，请大家仔�l�看SDK文档的Remarks部分�?br>
《D3DX的变化�?br>
　　D3DX的变化实际上相当的多�Q�但正如我一开始所��_(d��)��基本都是面向3D的。需要我们注意的有以下几�U�：(x��)
1、D3DX***FromFile之类的函数支持的囑փ�格式增加�?ji��n)，不过所增加的都是很��见的格式。��^时基本上�q�是用BMP、TGA和PNG��p��够�?br>2、增加了(ji��n)D3DXSave***ToFileInMemory�Q�将�?x��)把文�g写入内存。这个函数的作用��g��不是很容易想刎ͼ�但是如果你要写一个集成了(ji��n)转换、打包功能的工具�Q�这个就很有用了(ji��n)�Q�省��M��(ji��n)通过临时文�g操作造成的各�U�问题。另外如果你熟�?zh��n)�某种囑�Ş文�g的格式的话，�q�可以通过直接讉K��q�个文�g获得RAW信息。注意，�q�类函数写入的是一个ID3DXBuffer�Q�这个东西很��单，只有两个特定的方法，一看便懂，不再多言�?br>3、增加了(ji��n)一个ID3DXLine�Q�可以方便你�?D上画�U�，创徏ID3DXLine的方法是D3DXCreateLine。这个东西也不复杂，使用�Ҏ(gu��)��有点像ID3DXSprite�Q�稍微研�I�一下就能弄懂，注意每次Draw的是D3DPT_LINESTRIP。用它比直接用顶点缓冲的好处是可以方便的打开反锯齿，效果�?#8230;…基本满意�?br>4、增加了(ji��n)一个ID3DXRenderToSurface�Q?#8220;理论上来�?#8221;方便�?ji��n)利用RenderTarget的过�E?#8230;…不过我感觉反而弄得复杂了(ji��n)。创建的�Ҏ(gu��)��是D3DXCreateRenderToSurface�Q�有�?j��)情的朋友自��q��I�看看吧�Q�我��׃��讲了(ji��n)�?br>
　　ID3DXSprite和ID3DXFont在Summer 2004的DX9 SDK�Q�也��是�W�一版DX9.0c�Q�开始发生了(ji��n)很大变化�Q�下面详�q�ͼ�(x��)

『ID3DXSprite�?br>
　　你会(x��)发现ID3DXSprite::DrawTransform不见�?ji��n)，取而代之的是其功能被整合到ID3DXSprite::SetTransform里面�Q�也��是说�ؓ(f��)�?ji��n)羃攑֒�旋�{�Q�我们不得不和矩阉|��交道�?ji��n)。其实也不会(x��)太复杂，因�ؓ(f��)我们只是做一些矩阵运��，学过�U�性代数的朋友肯定�?x��)很熟�(zh��n)��Q�就��你不怎么熟�?zh��n)��U�性代敎ͼ�也没关系�Q�D3DX函数库提供了(ji��n)现成的矩阵运��函敎ͼ�你只要用��p��?ji��n)�?br>
D3DXMatrixScaling
D3DXMatrixRotationZ
D3DXMatrixTranslation

　　按照��序调用�q�三个函�?#8230;…或许学过3D的马上就惛_��q�点�?ji��n)，的确是没错啦。注意顺序哦�Q�Scaling -> Rotation -> Translation�Q�简�U�SRT�Q�看�q�全金属狂潮吗？看过的话�q�个单词很好记吧^_^�Q�，弄错�?ji��n)可是得不到正确�l�果的�?br>　　你是不是惛_��把同一个D3DXMATRIX当作参数使用三次�Q�错啦！你要用矩阵乘法。创��Z��个D3DXMATRIX�Q�比如mat1、mat2、mat3�Q�分别用�q�三个函数将其创��Zؓ(f��)�~�放矩阵、旋转矩阵和�q�移矩阵�Q�然后在ID3DXSprite::SetTransform�Ӟ��q�样写：(x��)

SetTransform(mat1 * mat2 * mat3);

　　有够�ȝ��(ch��)的是不？ID3DXSprite方便�?ji��n)�?D的，可害苦了(ji��n)�?D的，所以我已经不直接用�q�个�?ji��n)（什么叫不直接用�Q�往下看�Q��?br>
『ID3DXFont�?br>
　　大家来欢呼吧�Q�Summer 2004改进的ID3DXFont��d��枪毙掉了(ji��n)上一话那个字体引�?#8230;…
　　�q�东西的改进�Q�怎么说呢�Q�应该说是改头换面吧�Q�速度、效果都和以前不是一个数量��。可怜的PixelFont�Q�才存在�?ji��n)一话就要被抛弃�?ji��n)�?br>　　ID3DXFont多出来的几个�Ҏ(gu��)��Q�Preload*()�q�类的，��是把一些常用的字的字模提前��d��到内存里面加快速度�Q�同时还可以使用ID3DXSprite渲染�Q�进一步加快速度。虽然内部仍然有GDI的部分，不过很明昑ַ�作方式发生了(ji��n)极大的变化。根据我的估计，�q�次的ID3DXFont很聪明的利用GDI获得文字的轮廓，然后通过�U�理来渲染。这��L(f��ng)��速度��快得多�?ji��n)，而且文字质量也得��C��(ji��n)很好的控�Ӟ��基本和直接用GDI的质量相同了(ji��n)�?br>　　�׃��PreloadCharacters()和PreloadGlyphs()不是那么好理解，一般用PreloadText()��p��。徏议将所有ASCII字符、标点符号和部分汉字预读�q�去。这个预读过�E�略微有�Ҏ(gu��)��Q�而且�Ҏ(gu��)��预读的文字数量和你创建文字的字号�Q�占用的内存也不同。这里给大家一堆文字，你Copy�q�去��p��Q?br>

引用

const char strPreloadText[] = " 1234567890qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM~!@#$%^&*()-=[]\\;',./_+{}|:\"<>?　、�?#183;ˉˇ¨〃—～�?#8230;‘’“”〔〕〈〉《》「」『』〖〗【】！�Q�＃�K�％�Q�＇�Q�）(j��)�Q�＋�Q�－�Q�／�Q�１�Q�３�Q�５�Q�７�Q�９�Q�；�Q�＝�Q�？�Q�Ａ�Q��E�Q��I�Q�Ｇ�Q�ﾃ�Q��O�Q��݋�Q�Ｏ�Q�ͼ��QԌ��Q�_(d��)��QӞ�D�Q�Ｙ�Q��E�Q�|��Q�＿｀ａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ�?j��ng)｜｝我人有的和��M�不�ؓ(f��)�q�工要在�W�一上是中国�l�已发了(ji��n)民同";

　　注意�W�一个字�W�是�I�格哦！把空格预读进��d��是很重要的^_^
　　看上��dƈ不多�Q�因��考虑到内存占用及(qi��ng)速度�Q�我只预��M��(ji��n)一些符号和五笔的一键字。这些字�W�在24号字时候已�l�占用了(ji��n)�?MB�?ji��n)，比�vPixelFont字库占用的要大得多。天知道ID3DXFont到底预读�?ji��n)些什�?#8230;…
　　PreloadText()的第二个参数不要用strlen�Q�sizeof(strPreloadText)卛_��?br>　　然后��是利用ID3DXSprite来渲染。注意ID3DXFont::DrawText的第一个参数就是LPD3DXSPRITE�Q�因此如果要利用ID3DXSprite�Q�要��ID3DXFont::DrawText攑ֈ�ID3DXSprite::Begin和ID3DXSprite::End之间。这��是我刚才说的不直接用ID3DXSprite的意思，ID3DXFont�?x��)完成ID3DXSprite的全部调用，你不用担�?j��)�?br>　　另外你应该注意到ID3DXSprite::Begin增加�?ji��n)参敎ͼ�实际上DX文档里面没说�Q�但是示例里面有�Q�如果想让ID3DXSprite发挥作用�q�且最大幅度的提升效率�Q�参��C��讑֮�D3DXSPRITE_ALPHABLEND | D3DXSPRITE_SORT_TEXTURE卛_��。意思很明白�Q�打开Alpha�q��o(h��)和纹理筛选。这里DX文档上有个错误一直没改：(x��)文档里给出的是D3DXSprite__SORT_TEXTURE�Q�但是你可以试试�Q�绝�Ҏ(gu��)��错�?br>　　剩下的就没啥�?ji��n)，ID3DXFont的��用方法上一话已�l�讲�q�。要注意的是D3DXCreateFont和D3DXCreateFontIndirect都发生了(ji��n)变化。D3DXCreateFont已经不再牉|��GDI�?ji��n)，D3DXCreateFontIndirect所使用的结构也变成�?ji��n)D3DXFONT_DESC�Q�相对于LOGFONT�l�构�Q�除��M��(ji��n)一些用不着的参敎ͼ�增加�?ji��n)一个MipLevels�Q�就是MipMap�{��啦，不用多说�Q?D下只�?。其他的上一话都有。实际上�׃��D3DXCreateFont已经不再兌��GDI�Q�D3DXCreateFontIndirect的存在仅仅是�׃��历史原因�Q��ؓ(f��)�?ji��n)兼容像我这�U��h的��用习(f��n)惯）(j��)�Q�大家还是用D3DXCreateFont吧，省事�?br>　　截图��׃��贴了(ji��n)�Q�没啥意义。你可能觉得直接向后备缓冲上DrawText�q�不够好看，那么��先��d��一张纹理上�Q�然后将�U�理错位渲染到后备缓冲�ƈ且打开�U�型�q��o(h��)�Q�就可以辑ֈ�和PixelFont相同的效果了(ji��n)�?br>　　速度�?#8230;…我画�?ji��n)整整一屏幕字，在不�~�冲文字的情况下�Q�这�?#8220;�~�冲文字”和ID3DXFont的文字缓冲可不是一回事啊！看过上一话的都应该知道我�q�里指的是什么）(j��)�Q�速度仍然�?20FPS以上。或�怽��?x��)觉得速度�q�是有点慢，但是�Q�如果用D3D8的ID3DXFont��M��q�么一屏幕�Q�基本就只剩20FPS�?ji��n)�?br>　　使用ID3DXFont替换掉PixelFont的优势就是可以方便的自定义字体字号了(ji��n)�Q��ƈ且也不再受GB2312字库的限制。所以大安��换了(ji��n)�?#8230;…都换�?ji��n)�?#8230;…把PixelFont忘了(ji��n)�?#8230;…

『稳定的DX9 SDK版本�?br>
　　我现在用的是April 2006�Q�而且应该�?x��)用很长旉��。August 2006我是肯定不会(x��)�ȝ��啦！即��我不再恐惧D3D9�Q�也�?x��)对�q�个SDK避让三分的。其实对�?D�Q�我感觉用到April 2006��p��够了(ji��n)�Q�之后的DX9 SDK主要在D3DX�?D函数库部分进行更�?#8230;…其实也是�U�后的蚂��p��达不�?ji��n)几天，D3D10马上��p��出来�?ji��n)。要说D3D10�?#8230;…你还是看我另外一��日志好�?ji��n)，��M��打死我都不拿它做2D�?br>
　　实际上仅仅是2D的话�Q�从D3D8转向D3D9�q�没有多��变化，主要是稳定嘛�Q�只要你不调用一些D3D9专用的功能，即��拿D3D9来做2D�Q�在�l�大多数昑֍�上还是能够运行的。嗯……GF2�{��以上吧，GF2之前的，也太老了(ji��n)�Q�无视好�?ji��n)�?br>
《再上点菜好�?ji��n)�?x��)全屏�q�模式�?br>
　　其实�q�不是多么复杂的问题�Q�让我拖�?ji��n)这么�?#8230;…不拖�?ji��n)，�q�里��教�l�大家如何做全屏�q�模式以�?qi��ng)如何处理设备丢��q��问题�?br>
『创建全屏幕模式�?br>
　　D3DPRESENT_PARAMS里面�Q�W(xu��)indowed讑֮�为false�Q��ƈ且一定要讑֮�BackBufferWidth和BackBufferHeight�Q�完毕�?br>　　哈哈�Q�就�q�么��单，或许早就有�h��试�q�了(ji��n)�Q�但是你试试按下Alt+Tab�Q�再切换回去�Q�保证你什么都看不到�?br>　　之前曄��说过�Q�DX8之前的版本，在全屏幕下工作比在窗口下�Ҏ(gu��)��Q�到DX8之后��则完全颠倒过来。因为在�H�口模式下不用担�?j��)设备丢失（除非你更��?gu��)��面分辨率�Q�，全屏�q�模式下��׃��(x��)有这个问题了(ji��n)。下面详�q�ͼ�(x��)

『设备、资源丢失�?br>
　　讑֤�丢失�?x��)发生在全屏�q�模式下切换回桌面时�Q�不论是通过Alt+Tab�q�是QQ上有人给你发�?ji��n)张囄��?_-bbb�Q�，而且如果在调用IDirect3DDevice9::Reset�Q�从现在开始就是D3D9�?ji��n)啊�Q�忘记D3D8�?#8230;…�Q�的时候发生错误，讑֤�也会(x��)丢失�?br>　　讑֤�丢失�?x��)造成资源丢失�Q�所有创建在D3DPOOL_DEFAULT池的资源都会(x��)丢失�Q�需要重新创建，其内容当然也�?x��)消失，需要重写�?br>　　然而创建在D3DPOOL_SYSTEMMEM和D3DPOOL_SCRATCH池的资源不会(x��)受到影响。创建在D3DPOOL_MANAGED池的资源也不�?x��)丢失，而且在设备重新可用的时候，D3DPOOL_MANAGED池的资源也可以立��x��入��用，内容也不�?x��)改变。看�q�个池名字：(x��)托管池就能知道，D3D帮你处理�?ji��n)所有问题�?br>　　因此避免讑֤�丢失后资源丢��q��易方法就是将所有资源创建在D3DPOOL_MANAGED池内。不�q�这�q�不是个好方法，�q�意味着不能用渲染对象——记得吗�Q�RenderTarget只能创徏在D3DPOOL_DEFAULT。实际上最好的�Ҏ(gu��)��是跟�t�所有D3DPOOL_DEFAULT资源�Q�比如利用std::list�Q�将所有D3DPOOL_DEFAULT资源勾住�Q�在讑֤�发生丢失的时候释放掉资源�Q�设备可以��l��用的时候重新创��源，记得把数据写回去。对于其他的池就不用�q�么折腾�?ji��n)�?br>
『当讑֤�丢失之后�?br>
　　不论通过��M��方式发生�?ji��n)设备丢失，所有的操作几乎都会(x��)失效�Q�只有Release()可以用——其实D3D�?x��)保证有部分操作可以成功�Q�但是也仅仅�?#8220;可以”成功而不�?#8220;一�?#8221;成功�Q�所以你�q�不如认定丢��q��时候全都会(x��)��p�|比较好——以�?qi��ng)IDirect3DDevice9::TestCooperativeLevel。因此在讑֤�丢失之后�Q�你应该停止整个游戏循环�Q�而通过反复调用IDirect3DDevice9::TestCooperativeLevel判断讑֤�是否可用�?br>
『IDirect3DDevice9::TestCooperativeLevel�?br>
　　�q�个�Ҏ(gu��)��(g��)��当前的讑֤�状态。返回值有四种�Q�D3D_OK一切正常，D3DERR_DEVICELOST讑֤�丢失�Q�D3DERR_DEVICENOTRESET讑֤�可以Reset。另外还有D3D9新增的D3DERR_DRIVERINTERNALERROR�Q�遇到这个你��完蛋了(ji��n)�Q�基本不可能恢复�?ji��n)，�l�止�E�序吧�?br>　　按照��序来讲�Q�如果游戏在正常�q�行�Q�D3D_OK�?x��)返回；如果发生了(ji��n)设备丢失�ƈ且在�q�个时候不能恢复，比如全屏�q�模式的时候用户切换到�?ji��n)Windows桌面�Q�就�?x��)返回D3DERR_DEVICELOST�Q�如果用户又切换回了(ji��n)游戏�Q�设备可以恢复了(ji��n)�Q�还没恢复呢�Q�只�?#8220;可以”恢复而已�Q�，��׃��(x��)�q�回D3DERR_DEVICENOTRESET�?br>　　另外�Q�IDirect3DDevice9::Present也会(x��)�q�回�c�M��的��|��不过你最好别指望�q�个�Q�老老实实的用TestCooperativeLevel。因为Present在设备可以恢复的时候还是返回D3DERR_DEVICELOST�Q�外一句：(x��)D3D10的时候TestCooperativeLevel��׃��(x��)完全整合到Present里面�?ji��n)，可喜可贺可喜可贺�Q?br>
『处理设备丢失�?br>
　　看下面的伪代码：(x��)

switch (IDirect3DDevice9::TestCooperativeLevel()){
　　case D3D_OK:
　　　　GameLoop();
　　　　break;
　　case D3DERR_DEVICELOST:
　　　　break;
　　case D3DERR_DEVICENOTRESET
　　　　OnLostDevice();
　　　　IDirect3DDevice9::Reset();
　　　　OnResetDevice();
　　　　break;
　　default:
　　　　QuitGame();
　　　　break;
}

　　GameLoop()��是你的游戏�q�行的过�E�了(ji��n)。把�q�个switch写在我们游戏框架的GameMain()部分�Q�具体的位置可以看�Q何一话附带的源代码�?br>　　好像我一直没有讲IDirect3DDevice9::Reset的参数啊�Q�因为只有一个参敎ͼ��是指向D3DPRESENT_PARAMS的指针。把你第一�ơ创��备时使用的D3DPRESENT_PARAMS�l�构保存��h��Q�供Reset来用�?br>　　OnLostDevice()��是Release掉所有D3DPOOL_DEFAULT的资源，OnResetDevice()��是Create*()恢复啦！你可能注意到ID3DXFont、ID3DXSprite�{�等都有同名的方法，��是在这个时候调用的。如果你没有�q�么做，也就是说�q�保留着��M��D3DPOOL_DEFAULT的资源的话，IDirect3DDevice9::Reset��׃��定会(x��)��p�|�?br>　　另外在OnResetDevice里面你还要重新进�?font size=+0>SetRenderState�?font size=+0>SetSamplerState�{�等�Q�Reset之后�q�些东西也丢�׃��(ji��n)。实际上Reset和重新创��Z��ơ设备类��|��所不同的是重新创徏讑֤�的话你需要连D3DPOOL_MANAGED的资源也Release掉。这个话题就不讨��Z��(ji��n)�?br>　　从代码可以看出来�Q�D3DERR_DEVICELOST时程序什么都没做�Q�只是在�?c��)��。我认�ؓ(f��)�q�是一个好�?f��n)惯�Q�因为实在不能保证在D3DERR_DEVICELOST旉��?ji��n)Release�q�能�q�什么，与其�q�样�q�不如等讑֤�能用�?ji��n)再说�?br>
　　实在懒得��资源的话，全部D3DPOOL_MANAGED好了(ji��n)。至于渲染对象？自己惛_��法�?br>
『�h工制�?#8220;讑֤�丢失”�?br>
　　“�q�嘛�q�要刉��设备丢失啊�Q?#8221;如果更改游戏分��L率、色深、切换全屏幕�?qi��ng)窗口状态，�q�行�q�样的操作也要通过Reset�Q�同��L(f��ng)��Q�Reset之前也要释放掉所有D3DPOOL_DEFAULT资源�Q�其实严格来��_(d��)��q�有更多的资源也要释放，不过�?D下基本不�?x��)创��c�资源，你就不用��了(ji��n)�Q��ƈ且调用ID3DXSprite::OnLostDevice之类的方法。这��是人工刉��?#8220;讑֤�丢失”�?ji��n)。实际上在这个过�E�设备�ƈ没有真正的丢失，只是�?x��)有一�D�|��间处于不可用的状态，此时Reset��未�q�回�Q�整个D3D讑֤��好像死�?ji��n)一栗��D个例子，你切换桌面分辨率�Q�会(x��)有那么一�D�|��间显�C�器上什么都不显�C�，然后很快��正�怺�(ji��n)。和�q�个现象是同一个原因。Reset成功后记得恢复资源�?br>　　你可能注意到�q�里的Reset和上面的Reset不是一回事。的��是�q�样�Q�这里是��Z��(ji��n)重设状态而不是恢复设备。因此更改分辨率、色��q��Reset需要写到switch外面�Q�也��是别和它搅和的意�?_-bb。而且你只需要OnLostDevice -> Reset -> OnResetDevice。记住：(x��)正确的调用Reset不会(x��)造成讑֤�丢失�Q�这个概念别弄�؜�?ji��n)�?br>
『切换全屏幕模式时的注意事项�?br>
　　注意WindowStyle的变化。切换成全屏�q�模式后�Q�只能��用WS_POPUP�Q�不然显�C�Z��(x��)变得怪怪的�Q�你可以通过SetWindowLongPtr函数更改�H�口外观�Q�第二个参数指定GWL_STYLE卛_��。别忘了(ji��n)WS_VISIBLE啊！不然你什么都看不见�?br>
『更详细的文档�?br>
　　我这里只是简单讨��Z��(ji��n)造成讑֤�丢失的原因及(qi��ng)处理�Ҏ(gu��)��Q�更详细的内容你可以参考DX SDK文档的Lost Device文章�Q��h家是权威的�?br>
【以上，正片�l�束�Q�后面是ED�?br>
　　我们前进��C��(ji��n)D3D9�Q�赶上了(ji��n)时代�?br>　　我们创徏�?ji��n)全屏幕游戏�Q�赶上了(ji��n)时代�?br>　　我却变得一脑子��糊�Q�被观众抛弃�?ji��n)�?br>　　哈哈�Q�开玩笑啦，不过�q�一话很乱倒是真的�Q�因��Z��论是更新到D3D9�q�是讑֤�丢失�Q�牵扯的东西都太散太杂，�l�果弄得�q�一话也是一盘散沙（居然又没有附带代码）(j��)。唉�Q�大家就忍了(ji��n)吧，忍不�?ji��n)的话就来PIA我吧�?br>
　　关于更新至D3D9更多的内容，你可以参考SDK文档的《Converting to Direct3D 9》�?br>
【以上，ED�l�束�Q�后面是……�?br>
　　�W�一季完�l�了(ji��n)……
　　回过头来看看�Q�从�W�一话创��Z��个Windows�H�口�Q�到�q�一话的讑֤�丢失�Q�话题的层次一直在深入�Q�现在已�l�深入到�?ji��n)不再�?#8220;学习(f��n)”而是“研究”的范围。我也不再想仅仅是搞“教学”而是惛_��大家“讨论”。不�q�第一季主要还是教学吧。能坚持着看D2D教程到现在的�Q�应该基本能够写出完整的2D Demo来了(ji��n)吧。如果有什么问题的话，�Ƣ迎提出�Q�我在看到后�?x��)立��d��{�的……只要你这个问题不太RP的话……
　　那么�Q�第二季�?x��)是什么样子？
　　�W�二季就不再是教学了(ji��n)�Q�而开始我和大家的讨论�q�程。第二季的第一话，也就是第09话，我将提供一些高�U�技巧给大家�Q��ƈ希望有兴��的朋友和我一赯��行这些技巧的研究。另外在�W�二季里面，我们�q�要创徏一�?D囑�Ş引擎。原来打��给大家讲解Medux 2�Q�不�q�现在感觉这东西实在��儿�U�，�l�对�?x��)让大家B4的。那么既然如此，�q�脆介绍Mercury 3好了(ji��n)�Q�有意见无？
　　透漏一点下一话的内容吧：(x��)模糊�_�ֺ�和多�ơ纹理渲染，嘿嘿�Q�听上去挺高��q��是不�Q�实际上��񔽎�单，��q��你能不能惛_��而已�?br>　　希望你在看完�q�一话之后，�q�回��d��把前面的内容看看�Q�相信你�?x��)得到新的收莗��搞不好你还能抓出几个Bug呢！因�ؓ(f��)我是惛_��什么写什么，没个章法�Q�Bug是难免的�?br>

附加:

Direct3D中的字体与文本显�C?br>

囑�Ş�pȝ��中�ؓ(f��)�?ji��n)获得当前运行程序的相关信息�Q�往往需要在屏幕上显�C�文本，Direct3D的功能扩展接口ID3DXFont�Ҏ(gu��)��提供�?ji��n)方便的解决��?gu��)��?

创徏ID3DXFont对象

使用接口ID3DXFont�l�制文本�Q�首先需要通过函数D3DXCreateFont()创徏ID3DXFont字体对象。ID3DXFont接口��装�?ji��n)Windows字体和Direct3D讑֤�指针�Q�D3DXCreateFont()函数通过Windows字体和Direct3D讑֤�指针创徏ID3DXFont对象�Q�该函数的声明如下：(x��)

Creates a font object for a device and font.

HRESULT D3DXCreateFont( LPDIRECT3DDEVICE9 pDevice, INT Height, UINT Width, UINT Weight, UINT MipLevels, BOOL Italic, DWORD CharSet, DWORD OutputPrecision, DWORD Quality, DWORD PitchAndFamily, LPCTSTR pFacename, LPD3DXFONT * ppFont);
Parameters
pDevice
[in] Pointer to an IDirect3DDevice9 interface, the device to be associated with the font object.
Height
[in] The height of the characters in logical units.
Width
[in] The width of the characters in logical units.
Weight
[in] Typeface weight. One example is bold.
MipLevels
[in] The number of mipmap levels.
Italic
[in] True for italic font, false otherwise.
CharSet
[in] The character set of the font.
OutputPrecision
[in] Specifies how Windows should attempt to match the desired font sizes and characteristics with actual fonts. Use OUT_TT_ONLY_PRECIS for instance, to ensure that you always get a TrueType font.
Quality
[in] Specifies how Windows should match the desired font with a real font. It applies to raster fonts only and should not affect TrueType fonts.
PitchAndFamily
[in] Pitch and family index.
pFacename
[in] String containing the typeface name. If the compiler settings require Unicode, the data type LPCTSTR resolves to LPCWSTR. Otherwise, the string data type resolves to LPCSTR. See Remarks.
ppFont
[out] Returns a pointer to an ID3DXFont interface, representing the created font object.
Return Values
If the function succeeds, the return value is S_OK. If the function fails, the return value can be one of the following: D3DERR_INVALIDCALL, D3DXERR_INVALIDDATA, E_OUTOFMEMORY.

Remarks
The creation of an ID3DXFont object requires that the device supports 32-bit color.

The compiler setting also determines the function version. If Unicode is defined, the function call resolves to D3DXCreateFontW. Otherwise, the function call resolves to D3DXCreateFontA because ANSI strings are being used.

If you want more information about font parameters, see The Logical Font.

�C�Z��代码如下�Q?/p>

D3DXCreateFont(g_device, 50, 20, 20, 0, FALSE, DEFAULT_CHARSET, 0, 0, 0, "Arial", &g_font);

RedLight 2009-09-12 15:40 发表评论

优化3D囑�Ş渲染通道负蝲(�?

RedLight — Tue, 01 Sep 2009 01:32:00 GMT

一般来��_(d��)�� 定位渲染通道瓉��的方法就是改变渲染通道每个步骤的工作量, 如果吞吐量也改变�? 那个步骤��是瓉��.。找��C��(ji��n)瓉��p��惛_��法消除瓶�? 可以减少该步骤的工作�? 增加其他步骤的工作量�?

　　一般在光栅化之前的瓉��U�C��”transform bound”, 三角形设�|�处理后的瓶颈称�?#8221;fill bound”定位瓉��的办�?

1.改变帧缓冲或者渲染目�?Render Target)的颜色深�?16 �?32 �?, 如果帧速改变了(ji��n), 那么瓉��应该在��~�冲(RenderTarget)的填充率上�?
2.否则试试改变贴图大小和脓(chu��ng)图过滤设�|? 如果帧速变�?那么瓉��应该是在贴图�q�里�?
3.否则改变分��L�?如果帧速改变了(ji��n), 那么改变一下pixel shader的指令数�? 如果帧速变�? 那么瓉��应该��是pixel shader. 否则瓉��在光栅化过�E�中�?
4.否则, 改变��点格式的大��? 如果帧速改变了(ji��n), 那么瓉��应该在显卡带宽上�?
5.如果以上都不�? 那么瓉��在CPU�q�一辏V�?
优化�Ҏ(gu��)��36�?
1.��量减少无用的顶�Ҏ(gu��)��? 比如贴图坐标, 如果有Object使用2�l�有的��?�l? 那么�?要将他们攑֜�一个vertex buffer�? �q�样可以减少传输的数据量�?
2.使用多个streamsource, 比如SkinMesh渲染, 可以把顶点坐标和法线�q�些每一帧都要修改的数据攑֜�一个动态VB�? 其它不需要修改的(如脓(chu��ng)囑֝��?攑ֈ�一个静(r��n)态VB�? �q�样��减��了(ji��n)数据传输量�?
3.��量使用16位的索引�~�冲,避免32位的. 一斚w��费带宽, 一斚w��也不是所有的昑֍�都支�?2位的索引�~�冲�?
4.可以考虑使用vertex shader来计��静(r��n)态VB中的数据.比如SkinMesh的顶点可以放到vectex shader中计��? �q�样��可以避免每一帧都从AGP内存中向昑֭�传送数�? �q�样也可以��用静(r��n)态VB�?ji��n)�?
5.坚决避免使用Draw**UP一族的函数来绘制多边�Ş�?
6.在设计程序之前好好规划一下显卡内存的使用, ��保framebuffer, 贴图, �?r��n)态VB能够正好攑օ�昑֍�的本地内存中�?
7.��量佉K��Ҏ(gu��)��式大��是32字节的倍数.可以考虑使用压羃�q�的��点格式然后用vertex shader去解. 或者留下冗余的部分, 佉K��点大��刚好��32字节的倍数�?br>
8.��点在顶点缓冲中的顺序尽量符合绘制的��序, 考虑使用strips来代替list�?
9.如果可能��量多的使用static vertex buffer代替dynamic vertex buffer�?
10.动态VB使用DISCARD参数来lock更新, 使用NOOVERWRITE来添�?��量不要使用不带参数的lock调用(0)�?
11.��量减少lock的次�? 有些东西�q�不一定非要每一帧都更新VB, 比如人物动画一般每�U�钟更新30�ơVB基本上就够了(ji��n)�?
12.如果是因为需要绘制的��点数据太多�?ji��n)可以考虑使用LOD, 但是现在的显卡的�l�制能力都很强劲, 所以需要权衡一下LOD是否能够带来相应的好�? 如果�q�分的强化LOD很可能将瓉��转移到CPU�q�边�?
13.避免�q�多的顶点计��?比如�q�多的光�? �q�于复杂的光照计��?复杂的光照模�?, �U�理自动生成的开启也�?x��)增加顶点的计算�? 如果贴图坐标变换矩阵不是单位矩阵, 也会(x��)造成��点计算量的增加, 所以如果纹理变换已�l�结�? 记得要将�U�理变换矩阵设�ؓ(f��)单位矩阵同时调整贴图坐标�?
14.避免Vertex shader指��o(h��)数量太多或者分支过�? ��量减少vertex shader的长度和复杂�E�度. ��量使用swizzling代替mov�?
15.如果图象质量斚w��的计��?pixel shader)范围很大, �q�且很复�? 可以考虑试试全屏反走栗��说不定更快�?
16.��量按照front – back的顺序来�l�制�?
17.在shader中判断Z值可以避免绘制不可见的象�? 但是nvidia��单的shader不要�q�么�?(Don't do this in a simple shader)�?
18.如果可能, ��量使用vertex shader来代替pixel shader.��计��从逐象素变成逐顶炏V�?
19.��量降低贴图的大��?�q�大的脓(chu��ng)囑֏�能造成贴图cache�q�蝲, 从而导致脓(chu��ng)图cache命中降低.�q�大的脓(chu��ng)图会(x��)��D��昑֭��q�蝲, �q�时候脓(chu��ng)图是从系�l�内存中取的�?br>
20.只要可能��q��16位色的脓(chu��ng)�? 如环境脓(chu��ng)图或者shadow map.它们�?2位色的脓(chu��ng)囑֮�在是��费�?
21.考虑使用DXT 贴图压羃�?
22.如果可能,使用��单的贴图�q��o(h��)或者mip map, 除非必要否则��量不要使用三线�q��o(h��)和各��异性过�? light map �?环境贴图基本上都不需要��用它们�?
23.只有真正需要修改的贴图才��用Dynamic, �q�且使用DISCRAD和W(xu��)RITEONLY来lock�?
24.太多的��~�冲��d��可以考虑关闭Z-Writes如有些多pass的渲染中的后�l�pass或者粒子系�l�等半透明几何物体�Q�如果可以）(j��)�?
25.可能的话��量使用alpha test代替alpha blending�?
26.如果不需要stencil buffer��尽量��?6位的Z buffer�?
27.减小RenderTarget 贴图的大��? 如shadow map 环境贴图. 可能�Ҏ(gu��)��不需要那么大效果��很好�?
28.Stencil �?Z buffer ��量一起clear. 他们本来��是一块缓册Ӏ?
29.��量减少渲染状态的切换, ��量一�ơ画��可能多的多边�Ş。（�Ҏ(gu��)��昑֍�性能军_��最多画多少�Q?不过一般再多也不会(x��)多到哪里厅R�?除非你根本不需要脓(chu��ng)囑֒�渲染状态的切换�Q��?
30.��量使用shader来代替Fixed Pipeline�?
31.��量使用shader来实现来取代Multipass渲染效果�?
32.��量优先先徏立重要的资源, 如Render target, shaders, 贴图, VB, IB�{�等.以免昑֭��q�蝲的时候它们被创徏到系�l�内存中�?
33.坚决不要在渲染��@环中调用创徏资源�?
34.按照shader和脓(chu��ng)囑ֈ��l�后再渲�?先按照shaders分组再按贴图�?
35.Color Stencil Z buffer��量在一�ơClear调用中清除�?
36.一个Vertex buffer 的大��在2M-4M之间最好�?

RedLight 2009-09-01 09:32 发表评论

OpenGL Performance Optimization(�?

RedLight — Tue, 25 Aug 2009 06:05:00 GMT

SIGGRAPH '97

Course 24: OpenGL and Window System Integration

OpenGL Performance Optimization

1. Hardware vs. Software
2. Application Organization
- 2.1 High Level Organization
- 2.2 Low Level Organization
3. OpenGL Optimization
4. Evaluation and tuning

1. Hardware vs. Software

OpenGL may be implemented by any combination of hardware and software. At the high-end, hardware may implement virtually all of OpenGL while at the low-end, OpenGL may be implemented entirely in software. In between are combination software/hardware implementations. More money buys more hardware and better performance.

Intro-level workstation hardware and the recent PC 3-D hardware typically implement point, line, and polygon rasterization in hardware but implement floating point transformations, lighting, and clipping in software. This is a good strategy since the bottleneck in 3-D rendering is usually rasterization and modern CPU's have sufficient floating point performance to handle the transformation stage.

OpenGL developers must remember that their application may be used on a wide variety of OpenGL implementations. Therefore one should consider using all possible optimizations, even those which have little return on the development system, since other systems may benefit greatly.

From this point of view it may seem wise to develop your application on a low-end system. There is a pitfall however; some operations which are cheep in software may be expensive in hardware. The moral is: test your application on a variety of systems to be sure the performance is dependable.

2. Application Organization

At first glance it may seem that the performance of interactive OpenGL applications is dominated by the performance of OpenGL itself. This may be true in some circumstances but be aware that the organization of the application is also significant.

2.1 High Level Organization

Multiprocessing

Some graphical applications have a substantial computational component other than 3-D rendering. Virtual reality applications must compute object interactions and collisions. Scientific visualization programs must compute analysis functions and graphical representations of data.

One should consider multiprocessing in these situations. By assigning rendering and computation to different threads they may be executed in parallel on multiprocessor computers.

For many applications, supporting multiprocessing is just a matter of partitioning the render and compute operations into separate threads which share common data structures and coordinate with synchronization primitives.

SGI's Performer is an example of a high level toolkit designed for this purpose.

Image quality vs. performance

In general, one wants high-speed animation and high-quality images in an OpenGL application. If you can't have both at once a reasonable compromise may be to render at low complexity during animation and high complexity for static images.

Complexity may refer to the geometric or rendering attributes of a database. Here are a few examples.

During interactive rotation (i.e. mouse button held down) render a reduced-polygon model. When drawing a static image draw the full polygon model.
During animation, disable dithering, smooth shading, and/or texturing. Enable them for the static image.
If texturing is required, use GL_NEAREST sampling and glHint( GL_PERSPECTIVE_CORRECTION_HINT, GL_FASTEST ).
During animation, disable antialiasing. Enable antialiasing for the static image.
Use coarser NURBS/evaluator tesselation during animation. Use glPolygonMode( GL_FRONT_AND_BACK, GL_LINE ) to inspect tesselation granularity and reduce if possible.

Level of detail management and culling

Objects which are distant from the viewer may be rendered with a reduced complexity model. This strategy reduces the demands on all stages of the graphics pipeline. Toolkits such as Inventor and Performer support this feature automatically.

Objects which are entirely outside of the field of view may be culled. This type of high level cull testing can be done efficiently with bounding boxes or spheres and have a major impact on performance. Again, toolkits such as Inventor and Performer have this feature.

2.2 Low Level Organization

The objects which are rendered with OpenGL have to be stored in some sort of data structure. Some data structures are more efficient than others with respect to how quickly they can be rendered.

Basically, one wants data structures which can be traversed quickly and passed to the graphics library in an efficient manner. For example, suppose we need to render a triangle strip. The data structure which stores the list of vertices may be implemented with a linked list or an array. Clearly the array can be traversed more quickly than a linked list. The way in which a vertex is stored in the data structure is also significant. High performance hardware can process vertexes specified by a pointer more quickly than those specified by three separate parameters.

An Example

Suppose we're writing an application which involves drawing a road map. One of the components of the database is a list of cities specified with a latitude, longitude and name. The data structure describing a city may be:

	struct city {
float latitute, longitude;	/* city location */
char *name;			/* city's name */
int large_flag;  		/* 0 = small, 1 = large */
};

A list of cities may be stored as an array of city structs.

Our first attempt at rendering this information may be:

	void draw_cities( int n, struct city citylist[] )
{
int i;
for (i=0; i < n; i++) {
if (citylist[i].large_flag) {
glPointSize( 4.0 );
}
else {
glPointSize( 2.0 );
}
glBegin( GL_POINTS );
glVertex2f( citylist[i].longitude, citylist[i].latitude );
glEnd();
glRasterPos2f( citylist[i].longitude, citylist[i].latitude );
glCallLists( strlen(citylist[i].name),
GL_BYTE,
citylist[i].name );
}
}

This is a poor implementation for a number of reasons:

glPointSize is called for every loop iteration.
only one point is drawn between glBegin and glEnd
the vertices aren't being specified in the most efficient manner

Here's a better implementation:

	void draw_cities( int n, struct city citylist[] )
{
int i;
/* draw small dots first */
glPointSize( 2.0 );
glBegin( GL_POINTS );
for (i=0; i < n ;i++) {
if (citylist[i].large_flag==0) {
glVertex2f( citylist[i].longitude, citylist[i].latitude );
}
}
glEnd();
/* draw large dots second */
glPointSize( 4.0 );
glBegin( GL_POINTS );
for (i=0; i < n ;i++) {
if (citylist[i].large_flag==1) {
glVertex2f( citylist[i].longitude, citylist[i].latitude );
}
}
glEnd();
/* draw city labels third */
for (i=0; i < n ;i++) {
glRasterPos2f( citylist[i].longitude, citylist[i].latitude );
glCallLists( strlen(citylist[i].name),
GL_BYTE,
citylist[i].name );
}
}

In this implementation we're only calling glPointSize twice and we're maximizing the number of vertices specified between glBegin and glEnd.

We can still do better, however. If we redesign the data structures used to represent the city information we can improve the efficiency of drawing the city points. For example:

	struct city_list {
int num_cities;		/* how many cities in the list */
float *position;	/* pointer to lat/lon coordinates */
char **name;		/* pointer to city names */
float size;		/* size of city points */
};

Now cities of different sizes are stored in separate lists. Position are stored sequentially in a dynamically allocated array. By reorganizing the data structures we've eliminated the need for a conditional inside the glBegin/glEnd loops. Also, we can render a list of cities using the GL_EXT_vertex_array extension if available, or at least use a more efficient version of glVertex and glRasterPos.

	/* indicates if server can do GL_EXT_vertex_array: */
GLboolean varray_available;
void draw_cities( struct city_list *list )
{
int i;
GLboolean use_begin_end;
/* draw the points */
glPointSize( list->size );
#ifdef GL_EXT_vertex_array
if (varray_available) {
glVertexPointerEXT( 2, GL_FLOAT, 0, list->num_cities, list->position );
glDrawArraysEXT( GL_POINTS, 0, list->num_cities );
use_begin_end = GL_FALSE;
}
else
#else
{
use_begin_end = GL_TRUE;
}
#endif
if (use_begin_end) {
glBegin(GL_POINTS);
for (i=0; i < list->num_cities; i++) {
glVertex2fv( &position[i*2] );
}
glEnd();
}
/* draw city labels */
for (i=0; i < list->num_cities ;i++) {
glRasterPos2fv( list->position[i*2] );
glCallLists( strlen(list->name[i]),
GL_BYTE, list->name[i] );
}
}

As this example shows, it's better to know something about efficient rendering techniques before designing the data structures. In many cases one has to find a compromize between data structures optimized for rendering and those optimized for clarity and convenience.

In the following sections the techniques for maximizing performance, as seen above, are explained.

3. OpenGL Optimization

There are many possibilities to improving OpenGL performance. The impact of any single optimization can vary a great deal depending on the OpenGL implementation. Interestingly, items which have a large impact on software renderers may have no effect on hardware renderers, and vice versa! For example, smooth shading can be expensive in software but free in hardware While glGet* can be cheap in software but expensive in hardware.

After each of the following techniques look for a bracketed list of symbols which relates the significance of the optimization to your OpenGL system:

H - beneficial for high-end hardware
L - beneficial for low-end hardware
S - beneficial for software implementations
all - probably beneficial for all implementations

3.1 Traversal

Traversal is the sending of data to the graphics system. Specifically, we want to minimize the time taken to specify primitives to OpenGL.

Use connected primitives: Connected primitives such as GL_LINES, GL_LINE_LOOP, GL_TRIANGLE_STRIP, GL_TRIANGLE_FAN, and GL_QUAD_STRIP require fewer vertices to describe an object than individual line, triangle, or polygon primitives. This reduces data transfer and transformation workload. [all]

Use the vertex array extension: On some architectures function calls are somewhat expensive so replacing many glVertex/glColor/glNormal calls with the vertex array mechanism may be very beneficial. [all]

Store vertex data in consecutive memory locations: When maximum performance is needed on high-end systems it's good to store vertex data in contiguous memory to maximize through put of data from host memory to graphics subsystem. [H,L]

Use the vector versions of glVertex, glColor, glNormal and glTexCoord: The glVertex, glColor, etc. functions which take a pointer to their arguments such as glVertex3fv(v) may be much faster than those which take individual arguments such as glVertex3f(x,y,z) on systems with DMA-driven graphics hardware. [H,L]

Reduce quantity of primitives: Be careful not to render primitives which are over-tesselated. Experiment with the GLU primitives, for example, to determine the best compromise of image quality vs. tesselation level. Textured objects in particular may still be rendered effectively with low geometric complexity. [all]

Display lists: Use display lists to encapsulate frequently drawn objects. Display list data may be stored in the graphics subsystem rather than host memory thereby eliminating host-to-graphics data movement. Display lists are also very beneficial when rendering remotely. [all]

Don't specify unneeded per-vertex information: If lighting is disabled don't call glNormal. If texturing is disabled don't call glTexCoord, etc.

Minimize code between glBegin/glEnd

For maximum performance on high-end systems it's extremely important to send vertex data to the graphics system as fast as possible. Avoid extraneous code between glBegin/glEnd.

Example:

	glBegin( GL_TRIANGLE_STRIP );
for (i=0; i < n; i++) {
if (lighting) {
glNormal3fv( norm[i] );
}
glVertex3fv( vert[i] );
}
glEnd();

This is a very bad construct. The following is much better:

	if (lighting) {
glBegin( GL_TRIANGLE_STRIP );
for (i=0; i < n ;i++) {
glNormal3fv( norm[i] );
glVertex3fv( vert[i] );
}
glEnd();
}
else {
glBegin( GL_TRIANGLE_STRIP );
for (i=0; i < n ;i++) {
glVertex3fv( vert[i] );
}
glEnd();
}

Also consider manually unrolling important rendering loops to maximize the function call rate.

3.2 Transformation

Transformation includes the transformation of vertices from glVertex to window coordinates, clipping and lighting.

Lighting

Avoid using positional lights, i.e. light positions should be of the form (x,y,z,0) [L,S]
Avoid using spotlights. [all]
Avoid using two-sided lighting. [all]
Avoid using negative material and light color coefficients [S]
Avoid using the local viewer lighting model. [L,S]
Avoid frequent changes to the GL_SHININESS material parameter. [L,S]
Some OpenGL implementations are optimized for the case of a single light source.
Consider pre-lighting complex objects before rendering, ala radiosity. You can get the effect of lighting by specifying vertex colors instead of vertex normals. [S]

Two sided lighting: If you want both the front and back of polygons shaded the same try using two light sources instead of two-sided lighting. Position the two light sources on opposite sides of your object. That way, a polygon will always be lit correctly whether it's back or front facing. [L,S]

Disable normal vector normalization when not needed: glEnable/Disable(GL_NORMALIZE) controls whether normal vectors are scaled to unit length before lighting. If you do not use glScale you may be able to disable normalization without ill effects. Normalization is disabled by default. [L,S]

Use connected primitives: Connected primitives such as GL_LINES, GL_LINE_LOOP, GL_TRIANGLE_STRIP, GL_TRIANGLE_FAN, and GL_QUAD_STRIP decrease traversal and transformation load.

glRect usage: If you have to draw many rectangles consider using glBegin(GL_QUADS) ... glEnd() instead. [all]

3.3 Rasterization

Rasterization is the process of generating the pixels which represent points, lines, polygons, bitmaps and the writing of those pixels to the frame buffer. Rasterization is often the bottleneck in software implementations of OpenGL.

Disable smooth shading when not needed: Smooth shading is enabled by default. Flat shading doesn't require interpolation of the four color components and is usually faster than smooth shading in software implementations. Hardware may perform flat and smooth-shaded rendering at the same rate though there's at least one case in which smooth shading is faster than flat shading (E&S Freedom). [S]

Disable depth testing when not needed: Background objects, for example, can be drawn without depth testing if they're drawn first. Foreground objects can be drawn without depth testing if they're drawn last. [L,S]

Disable dithering when not needed: This is easy to forget when developing on a high-end machine. Disabling dithering can make a big difference in software implementations of OpenGL on lower-end machines with 8 or 12-bit color buffers. Dithering is enabled by default. [S]

Use back-face culling whenever possible.: If you're drawing closed polyhedra or other objects for which back facing polygons aren't visible there's probably no point in drawing those polygons. [all]

The GL_SGI_cull_vertex extension: SGI's Cosmo GL supports a new culling extension which looks at vertex normals to try to improve the speed of culling.

Avoid extra fragment operations: Stenciling, blending, stippling, alpha testing and logic ops can all take extra time during rasterization. Be sure to disable the operations which aren't needed. [all]

Reduce the window size or screen resolution: A simple way to reduce rasterization time is to reduce the number of pixels drawn. If a smaller window or reduced display resolution are acceptable it's an easy way to improve rasterization speed. [L,S]

3.4 Texturing

Texture mapping is usually an expensive operation in both hardware and software. Only high-end graphics hardware can offer free to low-cost texturing. In any case there are several ways to maximize texture mapping performance.

Use efficient image formats: The GL_UNSIGNED_BYTE component format is typically the fastest for specifying texture images. Experiment with the internal texture formats offered by the GL_EXT_texture extension. Some formats are faster than others on some systems (16-bit texels on the Reality Engine, for example). [all]

Encapsulate texture maps in texture objects or display lists: This is especially important if you use several texture maps. By putting textures into display lists or texture objects the graphics system can manage their storage and minimize data movement between the client and graphics subsystem. [all]

Use smaller texture maps: Smaller images can be moved from host to texture memory faster than large images. More small texture can be stored simultaneously in texture memory, reducing texture memory swapping. [all]

Use simpler sampling functions: Experiment with the minification and magnification texture filters to determine which performs best while giving acceptable results. Generally, GL_NEAREST is fastest and GL_LINEAR is second fastest. [all]

Use the same sampling function for minification and magnification: If both the minification and magnification filters are GL_NEAREST or GL_LINEAR then there's no reason OpenGL has to compute the lambda value which determines whether to use minification or magnification sampling for each fragment. Avoiding the lambda calculation can be a good performace improvement.

Use a simpler texture environment function: Some texture environment modes may be faster than others. For example, the GL_DECAL or GL_REPLACE_EXT functions for 3 component textures is a simple assignment of texel samples to fragments while GL_MODULATE is a linear interpolation between texel samples and incoming fragments. [S,L]

Combine small textures: If you are using several small textures consider tiling them together as a larger texture and modify your texture coordinates to address the subtexture you want. This technique can eliminate texture bindings.

Use glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_FASTEST): This hint can improve the speed of texturing when perspective- correct texture coordinate interpolation isn't needed, such as when using a glOrtho() projection.

Animated textures: If you want to use an animated texture, perhaps live video textures, don't use glTexImage2D to repeatedly change the texture. Use glTexSubImage2D or glTexCopyTexSubImage2D. These functions are standard in OpenGL 1.1 and available as extensions to 1.0.

3.5 Clearing

Clearing the color, depth, stencil and accumulation buffers can be time consuming, especially when it has to be done in software. There are a few tricks which can help.

Use glClear carefully [all]

Clear all relevant color buffers with one glClear.

Wrong:

  glClear( GL_COLOR_BUFFER_BIT );
if (stenciling) {
glClear( GL_STENCIL_BUFFER_BIT );
}

Right:

  if (stenciling) {
glClear( GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT );
}
else {
glClear( GL_COLOR_BUFFER_BIT );
}

Disable dithering: Disable dithering before clearing the color buffer. Visually, the difference between dithered and undithered clears is usually negligable.

Use scissoring to clear a smaller area: If you don't need to clear the whole buffer use glScissor() to restrict clearing to a smaller area. [L].

Don't clear the color buffer at all: If the scene you're drawing opaquely covers the entire window there is no reason to clear the color buffer.

Eliminate depth buffer clearing

If the scene you're drawing covers the entire window there is a trick which let's you omit the depth buffer clear. The idea is to only use half the depth buffer range for each frame and alternate between using GL_LESS and GL_GREATER as the depth test function.

Example:

   int EvenFlag;
/* Call this once during initialization and whenever the window
* is resized.
*/
void init_depth_buffer( void )
{
glClearDepth( 1.0 );
glClear( GL_DEPTH_BUFFER_BIT );
glDepthRange( 0.0, 0.5 );
glDepthFunc( GL_LESS );
EvenFlag = 1;
}
/* Your drawing function */
void display_func( void )
{
if (EvenFlag) {
glDepthFunc( GL_LESS );
glDepthRange( 0.0, 0.5 );
}
else {
glDepthFunc( GL_GREATER );
glDepthRange( 1.0, 0.5 );
}
EvenFlag = !EvenFlag;
/* draw your scene */
}

Avoid glClearDepth( d ) where d!=1.0: Some software implementations may have optimized paths for clearing the depth buffer to 1.0. [S]

3.6 Miscellaneous

Avoid "round-trip" calls: Calls such as glGetFloatv, glGetIntegerv, glIsEnabled, glGetError, glGetString require a slow, round trip transaction between the application and renderer. Especially avoid them in your main rendering code.
Note that software implementations of OpenGL may actually perform these operations faster than hardware systems. If you're developing on a low-end system be aware of this fact. [H,L]

Avoid glPushAttrib: If only a few pieces of state need to be saved and restored it's often faster to maintain the information in the client program. glPushAttrib( GL_ALL_ATTRIB_BITS ) in particular can be very expensive on hardware systems. This call may be faster in software implementations than in hardware. [H,L]

Check for GL errors during development: During development call glGetError inside your rendering/event loop to catch errors. GL errors raised during rendering can slow down rendering speed. Remove the glGetError call for production code since it's a "round trip" command and can cause delays. [all]

Use glColorMaterial instead of glMaterial: If you need to change a material property on a per vertex basis, glColorMaterial may be faster than glMaterial. [all]

glDrawPixels glDrawPixels often performs best with GL_UNSIGNED_BYTE color components [all] Disable all unnecessary raster operations before calling glDrawPixels. [all] Use the GL_EXT_abgr extension to specify color components in alpha, blue, green, red order on systems which were designed for IRIS GL. [H,L].

Avoid using viewports which are larger than the window: Software implementations may have to do additional clipping in this situation. [S]

Alpha planes: Don't allocate alpha planes in the color buffer if you don't need them. Specifically, they are not needed for transparency effects. Systems without hardware alpha planes may have to resort to a slow software implementation. [L,S]

Accumulation, stencil, overlay planes: Do not allocate accumulation, stencil or overlay planes if they are not needed. [all]

Be aware of the depth buffer's depth: Your OpenGL may support several different sizes of depth buffers- 16 and 24-bit for example. Shallower depth buffers may be faster than deep buffers both for software and hardware implementations. However, the precision of of a 16-bit depth buffer may not be sufficient for some applications. [L,S]

Transparency may be implemented with stippling instead of blending: If you need simple transparent objects consider using polygon stippling instead of alpha blending. The later is typically faster and may actually look better in some situations. [L,S]

Group state changes together: Try to mimimize the number of GL state changes in your code. When GL state is changed, internal state may have to be recomputed, introducing delays. [all]

Avoid using glPolygonMode: If you need to draw many polygon outlines or vertex points use glBegin with GL_POINTS, GL_LINES, GL_LINE_LOOP or GL_LINE_STRIP instead as it can be much faster. [all]

3.7 Window System Integration

Minimize calls to the make current call: The glXMakeCurrent call, for example, can be expensive on hardware systems because the context switch may involve moving a large amount of data in and out of the hardware.

Visual / pixel format performance

Some X visuals or pixel formats may be faster than others. On PCs for example, 24-bit color buffers may be slower to read/write than 12 or 8-bit buffers. There is often a tradeoff between performance and quality of frame buffer configurations. 12-bit color may not look as nice as 24-bit color. A 16-bit depth buffer won't have the precision of a 24-bit depth buffer.

The GLX_EXT_visual_rating extension can help you select visuals based on performance or quality. GLX 1.2's visual caveat attribute can tell you if a visual has a performance penalty associated with it.

It may be worthwhile to experiment with different visuals to determine if there's any advantage of one over another.

Avoid mixing OpenGL rendering with native rendering

OpenGL allows both itself and the native window system to render into the same window. For this to be done correctly synchronization is needed. The GLX glXWaitX and glXWaitGL functions serve this purpose.

Synchronization hurts performance. Therefore, if you need to render with both OpenGL and native window system calls try to group the rendering calls to minimize synchronization.

For example, if you're drawing a 3-D scene with OpenGL and displaying text with X, draw all the 3-D elements first, call glXWaitGL to synchronize, then call all the X drawing functions.

Don't redraw more than necessary

Be sure that you're not redrawing your scene unnecissarily. For example, expose/repaint events may come in batches describing separate regions of the window which must be redrawn. Since one usually redraws the whole window image with OpenGL you only need to respond to one expose/repaint event. In the case of X, look at the count field of the XExposeEvent structure. Only redraw when it is zero.

Also, when responding to mouse motion events you should skip extra motion events in the input queue. Otherwise, if you try to process every motion event and redraw your scene there will be a noticable delay between mouse input and screen updates.

It can be a good idea to put a print statement in your redraw and event loop function so you know exactly what messages are causing your scene to be redrawn, and when.

SwapBuffer calls and graphics pipe blocking: On systems with 3-D graphics hardware the SwapBuffers call is synchronized to the monitor's vertical retrace. Input to the OpenGL command queue may be blocked until the buffer swap has completed. Therefore, don't put more OpenGL calls immediately after SwapBuffers. Instead, put application computation instructions which can overlap with the buffer swap delay.

3.8 Mesa-specific

Mesa is a free library which implements most of the OpenGL API in a compatible manner. Since it is a software library, performance depends a great deal on the host computer. There are several Mesa-specific features to be aware of which can effect performance.

Double buffering: The X driver supports two back color buffer implementations: Pixmaps and XImages. The MESA_BACK_BUFFER environment variable controls which is used. Which of the two that's faster depends on the nature of your rendering. Experiment.

X Visuals: As described above, some X visuals can be rendered into more quickly than others. The MESA_RGB_VISUAL environment variable can be used to determine the quickest visual by experimentation.

Depth buffers: Mesa may use a 16 or 32-bit depth buffer as specified in the src/config.h configuration file. 16-bit depth buffers are faster but may not offer the precision needed for all applications.

Flat-shaded primitives

If one is drawing a number of flat-shaded primitives all of the same color the glColor command should be put before the glBegin call.

Don't do this:

	glBegin(...);
glColor(...);
glVertex(...);
...
glEnd();

Do this:

	glColor(...);
glBegin(...);
glVertex(...);
...
glEnd();

glColor*() commands: The glColor[34]ub[v] are the fastest versions of the glColor command.

Avoid double precision valued functions: Mesa does all internal floating point computations in single precision floating point. API functions which take double precision floating point values must convert them to single precision. This can be expensive in the case of glVertex, glNormal, etc.

4. Evaluation and Tuning

To maximize the performance of an OpenGL applications one must be able to evaluate an application to learn what is limiting its speed. Because of the hardware involved it's not sufficient to use ordinary profiling tools. Several different aspects of the graphics system must be evaluated.

Performance evaluation is a large subject and only the basics are covered here. For more information see "OpenGL on Silicon Graphics Systems".

4.1 Pipeline tuning

The graphics system can be divided into three subsystems for the purpose of performance evaluation:

CPU subsystem - application code which drives the graphics subsystem
Geometry subsystem - transformation of vertices, lighting, and clipping
Rasterization subsystem - drawing filled polygons, line segments and per-pixel processing

At any given time, one of these stages will be the bottleneck. The bottleneck must be reduced to improve performance. The strategy is to isolate each subsystem in turn and evaluate changes in performance. For example, by decreasing the workload of the CPU subsystem one can determine if the CPU or graphics system is limiting performance.

After bottlenecks have been identified the techniques outlined in section 3 can be applied. The process of identifying and reducing bottlenecks should be repeated until no further improvements can be made or your minimum performance threshold has been met.

4.2 Double buffering

For smooth animation one must maintain a high, constant frame rate. Double buffering has an important effect on this. Suppose your application needs to render at 60Hz but is only getting 30Hz. It's a mistake to think that you must reduce rendering time by 50% to achive 60Hz. The reason is the swap-buffers operation is synchronized to occur during the display's vertical retrace period (at 60Hz for example). It may be that your application is taking only a tiny bit too long to meet the 1/60 second rendering time limit for 60Hz.

Measure the performance of rendering in single buffer mode to determine how far you really are from your target frame rate.

4.3 Test on several implementations

The performance of OpenGL implementations varies a lot. One should measure performance and test OpenGL applications on several different systems to be sure there are no unexpected problems.

RedLight 2009-08-25 14:05 发表评论

Loading and displaying .X files without DirectX ----OpenGL渲染(�?

RedLight — Wed, 20 May 2009 12:15:00 GMT

摘要: 阅读全文

RedLight 2009-05-20 20:15 发表评论

在面试一个游戏编�E�职位前,你需要知道的东西(�?

RedLight — Sun, 04 Jan 2009 14:02:00 GMT

摘要: 阅读全文

RedLight 2009-01-04 22:02 发表评论

使用光照来表现立体感(�?

RedLight — Sat, 03 Jan 2009 05:59:00 GMT

摘要: 阅读全文

RedLight 2009-01-03 13:59 发表评论

使用混合来实现半透明效果

RedLight — Sat, 03 Jan 2009 05:45:00 GMT

摘要: 阅读全文

RedLight 2009-01-03 13:45 发表评论

RedLight — Sat, 03 Jan 2009 05:43:00 GMT

摘要: 阅读全文

RedLight 2009-01-03 13:43 发表评论

OpenGL片断��试

RedLight — Sat, 03 Jan 2009 05:42:00 GMT

摘要: 阅读全文

RedLight 2009-01-03 13:42 发表评论

Opengl�~�程低��错误 (转蝲)

RedLight — Sat, 27 Sep 2008 09:13:00 GMT

摘要: 阅读全文

RedLight 2008-09-27 17:13 发表评论

RedLight — Fri, 19 Sep 2008 03:45:00 GMT

摘要: 阅读全文

RedLight 2008-09-19 11:45 发表评论

骨骼动画解释(�?

RedLight — Fri, 19 Sep 2008 03:18:00 GMT

摘要: 阅读全文

RedLight 2008-09-19 11:18 发表评论

教你如何使用maxscript调试�?转蝲)

RedLight — Fri, 19 Sep 2008 00:33:00 GMT

教你如何使用maxscript调试�?/strong>
作�?李英�?#160;(转蝲)

　　��我个�h来说3ds max 8脚本调试器用得不多，在这里我讲一下脚本调试器的原理和��单的用法。脚本调试器只不�q�是一个max8的一个调试程序，它允许暂�?ds max�ȝ��E�和其它�U�程�Q�可以��用命令threads昄��当前�?ds max所有线�E�及(qi��ng)�U�程id�Q�可以��用setThread �U�程ID)切换当前调试的线�E�。一般情况调试器默认��试最��层(�q�个词不知道大家是否理解我的意思，��像是家里面的菜盘子�Q�最��层的盘子是最后放上去的，当然要拿开盘子只能从最上面一个开�?的线�E�，3ds max本��n在实��C��些操作时大量使用多线�E�，例如渲染�Ӟ��创��Z��(ji��n)一个新�U�程�Q�关于线�E�和�q�程的区别，你可以找相应�~�程书籍。在3ds max8的目前调试器版本�Q�还不支持鼠标选择某行讄��断点�Q�至��我没有扑ֈ��q�个功能�Q�你扑ֈ��?ji��n)请告诉我�?j��)�Q�因此我现在断定�Q�这是一个不实用的工兗��但�?ds max8已经为我们提供了(ji��n)�q�个工具�Q�当然是有用的了(ji��n)�Q�只不过�E�稍�ȝ��(ch��)�?br>　　使用脚本调试器（我假设你已经写好�?ji��n)一个脚本）(j��)�Q�首先在你的脚本你想要暂停的地方讄��断点�Q�设�|�断点可以��用break()函数�Q�也��是当脚本运行到�q�里时暂停程序。这样当你运行这个脚本时你将�?x��)在脚本调试器�?输出"�~�辑�H�口中显�C�线�E�相应的内容。（我个��为每�q�行一个脚本，3ds max��׃ؓ(f��)�q�个脚本创徏一个线�E�，所以才可以使用脚本调试器来跟踪脚本。谁能证实一下我的猜��?我的E-MAIL: liyingjang@21cn.com�Q�。当脚本执行��C��讄��断点处就停下来，�q�时你可以��用getVar ""�?setVar "" 来查看变量内容和临时讄��变量内容。更方便查看变量内容的方法可以点�?监视"按钮�Q�然后输入变量名按回车键卛_��昄��你刚输入的变量名的��g��(ji��n)。点�?�q�行"按钮可以使脚本运行到下一个断点处暂停�Q�查看和讄��变量的方法上一步所说的是一栗��?br>　　�ȝ��一下：(x��)3ds max8的目前的调试器还很不完善�Q�你也可以用丑陋来�Ş容，希望以后的Max版本能把脚本调试器做得好点，好可惜我没有看到在MAX9调试器有一点点的变化）(j��)。脚本调试器虽不怎么��P��但也不是一无是处，臛_��你可以少用print和format来查看运行时变量内容和调试脚本�?br>　　以下是我写了(ji��n)一个简单的调试脚本�Q�运行后当i=10时会(x��)启动脚本调试器�?br>global ps=10
for i=0 to 100 do
(
ps = ps + i
　　if i==10 do break()
)
查看�q�行时断点的i变量�Q�可以点�?监视"按钮�Q�输入i 然后回�R�Q�就可以看到变量i的��gؓ(f��)10�?/p>

RedLight 2008-09-19 08:33 发表评论

��Z��四叉�?w��i)空间划分的地�Ş实时渲染��?gu��)��

RedLight — Sun, 11 May 2008 13:54:00 GMT
地�Ş是计��机囑�Ş的一个重要组成部分，而它又具有特�D�的形态。地形往往覆盖面积极广�Q�且�_�ֺ�要求很高�Q��得我们必��ȝ��许多多边形来描述。这��L(f��ng)��特点使得我们不能像对待其他普通模型那样对待地形。要惛_��时地渲染地�Ş�Q�我们需要一些特�D�的�Ҏ(gu��)��?
    地�Ş渲染一直以来都是计��机囑�Ş学中一个重要的研究领域。�ƈ且在�q�一斚w��已经诞生�?ji��n)许多优�U�的算法。其中包括基于体素的渲染�Ҏ(gu��)��Q�也有基于多边�Ş的渲染方法。早期的游戏�Q�如三角�z�特�U�部队就是采用体素渲染法的成功例子。体素法�c�M��光线�q�踪渲染�Q�它从屏�q�空间出发，扑ֈ�地�Ş与屏�q�像素发出的��线交点�Q�然后确定该像素的颜艌Ӏ�这�U�方法不依赖具体的图形硬�Ӟ��整个渲染�q�程完全使用CPU处理�Q�因此它不能使用��C��g来加速，�q�且对于一个场景来��_(d��)��往往不只是地形，�q�有其他使用多边形描�q�的物体�Q�体素法渲染的图像很难与��g渲染的多边�Ş�q�行混合�Q�因此这�U�方法现在用得极��。而多边�Ş渲染�Ҏ(gu��)��则成��Z��U�主��。选择多边形来描述和渲染地形有很多的理由和优点。最重要的是它能够很好地使用��g加速，�q�且能够和其他多边�Ş对象一��L(f��ng)��一��理�?/span>

    已有大量优秀的基于多边�Ş的地形渲染算法。比较经典的��法�?/span>M. Duchaineau�{��h提出ROAM��法。这个算法采用一��三角二叉树(w��i)来描�q�整个地形。一个地形在最初的层次上由两个较大的等腰直角三角�Ş�l�成�Q�这两个�{�腰直角三角形可以被不断地细分来展现地�Ş的更多细节。每一�ơ细分过�E�都向直角三角�Ş的斜边的中点处增加一个由高程数据所描述的顶点，该点��所在的直角三角形一分�ؓ(f��)二，同时该算法也定义�?ji��n)一些规则来保证地�Ş中不�?x��)因盔R��两个三角形细节层�ơ的不同而出现裂�~�。这个算法已被许多游戏所采用。还有一�cȝ��法，通过��地形在X-Z投媄(ji��ng)面上不断地规则细分来得到不同的细节，�q�就是本文要介绍的四叉树(w��i)�I�间划分��法。另外，最新提出的一个地形算法也不得不提�Q�Hugues Hoppe�?/font>2004�q�提出的几何裁剪图方�?Geometry Clipmaps)�Q�算法��用了(ji��n)最新硬件所支持的顶点纹理来定义地�Ş的外观，�q�且对于距离摄媄(ji��ng)��Z��同远�q�的地方采用不同的纹理层�Q�最大限度地使用��g加速了(ji��n)地�Ş渲染的过�E�。这个方法听��h��非常��妙�Q�但它目前只被较?y��u)��的��g支持。因为顶点纹理是Shader Model 3.0才支持的功能�Q�也��是说只有DirectX 9.0c�U�别的显卡才能支持这�U�算法。这对于某些有普�?qi��ng)性要求的囑�Ş应用�E�序�Q�尤其是�Ҏ(gu��)��戏来讲不是一件好的事情。因此大多数人现在还在��用经典的地�Ş渲染�Ҏ(gu��)��?/span>

    首先�Q�基于四叉树(w��i)的地形渲染方法��用高�E�数据作为数据源。且��法要求高程数据的大��必��Mؓ(f��)2ⁿ+1的正方�Ş。所谓高�E�数据，卌��彩范围在0-255的灰度图片，不同的灰度代表了(ji��n)不同的高度倹{��如果某高程数据指出�q�个高程数据最高处的Y坐标值是4000�Q�那么在高程数据中一个��gؓ(f��)255的像素点��p��C��个点所代表的地形区域的高度�?000�Q�同理如果该像素值是127那么��p��C��个点所代表的地形区域的高度�?000×(127/255)=2000。高�E�数据的每个像素都对应所渲染�|�格中的一个顶炏V��另外还有一个参数描�q�顶点与��点之间的水�q��，以及(qi��ng)一个描�q�最大高度的参数。因此地形的基本数据�l�构如下�Q?/span>

    struct Terrain
    {
        char **DEM; //一个描�q�高�E�数据的二维数组
        float CellSpace;
        float HeightScale;
    }

    其中�Q�各变量的具体意义如下图所�C�：(x��)

    有了(ji��n)�q�些参数�Q�我们可以很�Ҏ(gu��)��地由高程数据的参数值得到它所表述的多边�Ş�|�格。得到这个网��g��后，可以��单地把它攑օ��点数组�Q��ƈ��Z��建立一个顶点烦(ch��)引，��可以传入硬件进行渲染了(ji��n)。然而，事情�q�不是这么简单。对于较?y��u)��尺寸的高程数�?�?29×129)�Q�这样做��实可行�Q�但随着高程数据规模的增大，所需的顶�Ҏ(gu��)��和描�q�网格的三角形数�?x��)急剧膨胀。这个数值很快就�?x��)大到最新的昑֍�也无法接受。比如一�?025×1025的高�E�数据，我们需�?025×1025=1050625个顶点，以及(qi��ng)1050625×2=2101250个三角�Ş。就��你的显卡每�U�能够渲�?000万个三角形，你也只能得到不到5fps的渲染速度�Q�况且你的场景可能还不只包括地�Ş。因此我们必��L��办法在不影响视觉效果的情况下�~�减所渲染的三角�Ş数量�Q�另外还应该注意一�ơ性将最多的数据预先传给��g以节�U�带宽�?/span>

    �q�里要讲解的��法�Q�目的就是在不媄(ji��ng)响或在视觉可以接受的范围内羃减所渲染三角形的数量�Q�以辑ֈ�实时渲染的要求。根据测试，本算法在漫游大小�?025*1025的地形时速度�E�_��?50fps以上(在nVidia Geforce 6200 + P4 1.6GHz的硬件上得到)�?/span>

    �׃��地�Ş覆盖范围�q�，但它的投影在XZ�q�面上均匀分布(以下采用OpenGL中的��x��坐标�p�，Y轴�ؓ(f��)竖直向上的坐标��u)�Q�因此我们有必要考虑对地形进行空间划分。正是由于这��L(f��ng)��均匀分布�Q�给我们的划分过�E�带来了(ji��n)便利。我们不需要具体地��d��割某个三角�Ş�Q�只要选择那些�q�顶点且和X或Z轴垂直的�q�面作�ؓ(f��)划分面即可。例如对于一个高�E�数据，我们可以以坐标原点作为地形的中心(j��)点，然后沿着X轴和Z轴依�ơ展开来分布各个顶炏V��如下如所�C��?/span>

    首先�Q�我们可以选择X=0和Z=0�q�两个��^面，��地形划分�ؓ(f��)�{�大的四个区域，然后对划分出来的四个子区域进行递归划分�Q�每�ơ划分都选择交于区域中心(j��)点�ƈ且互相垂直的两个�q�面作�ؓ(f��)划分面，直到每个子区域都只包含一个地形单元块�Q�即两个三角形）(j��)而不能再划分为止。例如对于上图所�C?*9大小的地形块�Q�经�q�划分之后如下图所�C�：(x��)

    由图可知�Q�只有高�E�数据满��?cite style="FONT-STYLE: normal">大小2ⁿ+1的正方�Ş�q�个条�g�Q�我们才可能对地形进行均匀划分�?/cite>我们可以把划分结果用一��|��(w��i)来表�q�ͼ��׃��每次划分之后产生四个子节点，因此�q�棵�?w��i)叫四叉树(w��i)。那么，�q�棵�?w��i)中应该存储那些信息呢？首先对于每个节点�Q�应该指定这个节�Ҏ(gu��)��代表的地形的区域范围。�ƈ不是把地形网��g��实际的顶�Ҏ(gu��)��入树(w��i)中，而是要在�?w��i)中说明�q�个节点覆盖�?ji��n)地形的那些区域。比如一个子节点应该有一个Center(X,Y)变量�Q�指定这个节点的中心(j��)�Ҏ(gu��)��对应的顶点烦(ch��)引，或编受��ؓ(f��)�?ji��n)方便�v见，可以把地形中�?j��)点�~�号�?0,0)然后沿着坐标轴递增。此外还要有个变量指定这个节点到底覆盖了(ji��n)地�Ş的多��个��点。如下图所�C��?/span>

    我们目前的四叉树(w��i)的数据结构如下：(x��)

    struct QuadTreeNode
    {
        QuadTreeNode *Children[4];
        int CenterX,CenterY;
        int HalfRange;
    }

    有了(ji��n)四叉�?w��i)之后，如何利用它的优势呢？首先我们考虑��单的视见体裁�?View Frustum Culling�Q�以下简�U�VFC)。相信很多接触过基本囑�Ş优化的�h都应该熟�(zh��n)�VFC�Q�VFC的作用既是对那些明显位于可见�q�x��头体之外的多边�Ş在把它们传给昑֍�之前剔除掉。这个过�E�由CPU来完成。虽然简单，但它却非常有效。VFC�q�程如下�Q?/span>

    1.为每个节点计��包围球。包围球可以��单的以中�?j��)顶点��?f��)球心(j��)�Q�最大坐标值点(节点所覆盖的所有顶点的最大X、Y、Z��g��为此点的坐标�?到球�?j��)的距离为半径�?/span>

    2.�Ҏ(gu��)��当前的投影和变换矩阵计算此时可视�q�x��头体的六个��^面方�E�。这一步可以参考Azure的Blog上的一��文章，�q�篇文章�l�出�?ji��n)VFC的具体代码�?a target=_blank>单击�q�里�?/span>

    3.从树(w��i)的根�l�点以深度优先的��序遍历�?w��i)。每�ơ访问节�Ҏ(gu��)��Q�测试该节点包围球与视见体的�怺�情况。在下面的情况下�Q�包围球与视见体�怺��Q?/span>

        1) 球心(j��)在六个��^面所包围的凸状区域内部�?br>        2) 球心(j��)在六个��^面所包围的凸状区域外部，但球�?j��)到某个�q�面的距��d��于半径�?/span>

    4.如果�怺��试昄��包围球和视见体存在交集，�l�箋递归遍历此节点的4个子节点�Q�如果此节点已经是叶节点�Q�则�q�个节点应被�l�制。如果不存在交集�Q�放弃这个节点，对于�q�个节点的所有子节点不再递归��(g��)查。因为如果一个节点不可见�Q�那么其子节点一定不可见�?/span>

    �q�样�Q�我们剔除了(ji��n)那些不在视见体内的地形区域，节约�?ji��n)一些资源。但�q�还不够。在某些情况下，VFC可能�q�会(x��)指出整个地�Ş都可见，在这�U�情况下�Q�将�q�么多三角�Ş都画出显然是不可取的�?/span>

    因此�q�要考虑地�Ş的细节层��?LOD)。我们应该考虑刎ͼ�地�Ş不可能所有部分都一样��^坦或陡峭。对于��^坦的部分�Q�我们用�q�多的三角�Ş��L��q�是没有意义的。而对于�v伏程度较大的区域�Q�只有较多的三角形数量才不让人感到尖锐的��p��。再者，无论地�Ş起伏�E�度如何�Q�那些距��视点很�q�的区域�Q�也没有必要��p��太多的资源去渲染�Q�毕竟它们投影到屏幕上的面积很小�Q�对其进行简化也是必要的�?/span>

    既然我们要对起伏�E�度不同的区域采用不同的�l�节�U�别�Q�我们首先必��L��C��U�描�q�地形�v伏程度的量。与其说起伏�E�度�Q�不如说是地形的某个��点因�ؓ(f��)被简化后而��生的误差。要计算�q�个误差�Q�我们先要了(ji��n)解地形是如何被简化的�?/span>

    考虑下图所�C�的地�Ş块，它的渲染�l�果如下囑֏�图所�C��?/span>

   现在如果要对所需渲染的三角�Ş�q�行��化，我们可以考虑�q�个地�Ş块每条边中间的顶�?下图左侧�U�色�?�Q?/span>

   如果��这些红色的��点剔除�Q�我们可以得��C��囑֏��Ҏ(gu��)��C�的��化后的网根{��误差就在这一步��生。由于红色的��点被剔除后�Q�原本由�U�色��点所表示的地形高度现在变成了(ji��n)两侧黑色��点插值后的高度。这个高度就是误差。如下图�?/span>

    因此�Q�对于每个节点，我们先计��这个节�Ҏ(gu��)��有边中点被删除后所造成的误差，分别��Cؓ(f��)ΔH1, ΔH2, ΔH3, ΔH4。如果这个节点包含子节点�Q�递归计算子节点的误差�Q��ƈ把四个子节点的误差记�?#916;Hs1, ΔHs2, ΔHs3, ΔHs4。这个节点的误差��是�q�八个误差��g��的最大倹{��由于这是一个递归的过�E�，因此应该把这个过�E�加到四叉树(w��i)的生成过�E�中�Q��ƈ向四叉树(w��i)的数据结构中加入一个误差变量。如下�?/span>

    struct QuadTreeNode
    {
        QuadTreeNode *Children;
        int CenterX,CenterY;
        int HalfRange;
        float DeltaH; //节点误差�?br>    }

    下面来看一下地形的具体渲染�q�程�?/span>

    首先�Q�我们位于四叉树(w��i)的根�l�点。我们此时考虑根结点的误差�Q�如果这个误差小于一个阈��|��直接使用根结点的中心(j��)点以�?qi��ng)此节点的四个边角点作��?f��)��点渲染一个三角扇形，�q�个三角扇�Ş��是渲染出来的地形。但是更�l�常的情况下�Q�根�l�点的误差值是很大的，因此��法认�ؓ(f��)要对根结点进行细分，以展现更多细节。于是对于根�l�点的每个子节点�Q�重复这个步骤，��x��查它的误差值是否大于阈��|��如果大于�Q�直接渲染这个节点，如果��于�Q�递归�l�分节点。目前我们的��法伪代码如下�?/span>

    procedure DrawTerrain(QuadTreeNode *node)
    {
      if (node->DeltaH > k)
      {
           for (i=0;i<4;i++)
           {
                DrawTerrain(node->Children[i]);//递归划分
           }
      }
      else
      {
           GraphicsAPI->DrawPrimitive(node);//以节点的中心(j��)点和四个边角点绘制三角扇�?
      }
    }

    �q�个伪代码在一个较高的层次上表�q�C��(ji��n)��法的基本思想。然而我们还有许多问题要考虑。其一是目前我们仅仅考虑�?ji��n)地形的�l�节层次和地形表面�v伏程度的关系�Q�但�q�应该考虑地�Ş块距��视点远�q�跟地�Ş�l�节层次的关�p�R��解册��个问题很��单，我们只需在伪代码的条件中加入距离�q�一因素卛_��。即�?/span>

        if (node->DeltaH > k)
        {
            ...
        }
        else ...

    改�ؓ(f��)�Q?/span>

        if (node->DeltaH / d > k)
        {
            ...
        }
        else ...

    其中d��点中�?j��)点与视点之间的距离。而事实上�Q�当�l�节�E�度与距��ȝ��q�x��成反比时�Q�能够减��更多的三角形，而且视觉效果更好�Q�只要阈值k讄��得当�Q�根本感觉不出地形因��点的�U�d��而发生几何�Ş变。因此，我们最�l�的条�g式�ؓ(f��)�Q?/span>

    node->DeltaH / d² > k

    �q�有一个很重要的问题，��是�q�个��法所产生的地形会(x��)因�ؓ(f��)节点之间�l�节层次的不同而��生裂�~�。下图说明了(ji��n)裂缝的��生原因�?/span>

    有两个方法可以解册��个问题，一个方法是删除左侧节点中��生裂�~�的��点�Q��两条边能够重合。另一�U�方法是��Zؓ(f��)地在右侧地�Ş块中插入一条边�Q�这条边�q�接中心(j��)点和造成裂缝的顶点，从而消除裂�~�。在渲染地�Ş�Ӟ��可以采取下面的办法避免裂�~�的产生�Q?/span>

    1.在预处理阶段�Q��ؓ(f��)所有顶点创��Z��个标记数�l�，标记以该��点��Z��?j��)点的节点在某一帧是否被�l�分。如果被�l�分则标��Cؓ(f��)1�Q�否则标�?�?/span>

    2.从根节点开始，以广度优先的��序遍历四叉�?w��i)，使用之前提出的条件式判断节点是否需要分剌Ӏ�如果公式表明需要分�Ԍ��q�且与节点相�?c��)��四个节点的中心(j��)点都被标记�?�Q�那么把�q�个节点�?qi��ng)其四个子节点的标记设��?f��)1�Q��ƈ递归�l�分�q�个节点。否则，��这个节点的标记设�ؓ(f��)1�Q�把�q�个节点的四个子节点的标记设�?�Q�然后采用下面的�Ҏ(gu��)��l�制�q�个地�Ş块：(x��)

        1)��节点的中心(j��)��点和四个边角点��d��到即��绘制的三角扇�Ş列表中�?br>        2)依次��(g��)查与四条边相�?c��)��节点的标记数�l�，如果相应的标��Cؓ(f��)1�Q�那么将该点��d��C��角扇形的��点列表中，否则跌��该点�?br>        3)�l�制三角扇�Ş�?/span>

    我们最�l�的伪代码如下�?/span>

bool IsNodeInFrustum(QuadTreeNode *node)

{

   return (node->BoudingSphere in frustum);

}

bool NeighbourIsValid(QuadTreeNode *node)

{

   return (all four neighbours of node are identified as 1)

}

void RenderTerrain()

{

   listnext,current,draw;

   int level =0;
   current.push_back(root);
   while (current.size()!=0)

   {

      for each thisNode in current

      {
         if (!IsNodeInFrustum(thisNode))
            continue;
         if (level == MaxResolution)
            draw.push_back(thisNode);
         else

         if (thisNode->DeltaH/(distance*distance) > k

             && NeighbourIsValid(thisNode) )

         {

             SetFlag(thisNode,1);

             for j= 1 to 4

             {

                next.push_back(thisNode->Children[j]);

                SetFlag(thisNode->Children[j],1)

             }

         }

         else

         {

            SetFlag(thisNode,1);

            for j= 1 to 4

             {

                draw.push_back(thisNode->Children[j]);

                SetFlag(thisNode->Children[j],0);

             }

         }

      }

      SwapList(current,next);
      next.clear();

      level++;

   }

   GraphicsAPI->DrawPrimitives(draw);

}

    另外�Q�一个重要的优化是利用硬件的�~�冲区或��点数组(对于不支持顶点缓冲的��g而言)。因为地形无论怎样��化，��点数据��L��固定不变的。我们在每一帧动态��生的仅仅是顶点烦(ch��)引，因此我们有必要实现将地�Ş的所有顶�Ҏ(gu��)��据输入到��点�~�冲中，然后在渲染时一�ơ性将所有的索引传给昑֍��Q�以提高速度。实验表明，使用��点�~�冲比直接��用glBegin/glEnd�l�制囑�Ş要快5倍以上�?/span>

    以上讲述�?ji��n)如何做到实时地渲染大型地�Ş。主要应用了(ji��n)LOD和VFC两种手段来精��三角形数量。然而VFC只能剔除不在视见体内的图形，而对于在视见体内但被其他更近的物体遮挡的情况却无能�ؓ(f��)力。如果要实现地�Ş的自遮挡剔除�Q�地�q�线��法是一个好的选择。然而当你的场景不仅仅是包含地�Ş�Ӟ��地��^�U�算法也只能处理地�Ş的自遮挡情况。因为地�q�线��法只对2.5D的地�?卛_��XZ�q�面上无重合投媄(ji��ng)的场�?有效。对于完�?D场景�Q�地�q�线�q�不能很好的工作。所以当你在引擎中��用地形时�Q�可以考虑��地形分块后攑օ�场景的管理树(w��i)中，如BSP或Octree�{�。然后根据引擎的性质使用入口(Portal)、PVS或者遮挡测�?Occlusion Culling)�{�方法进行遮挡剔除。值得��的是�Q�遮挡测试是一个非常灵�zȝ��实时的剔除算法，且无需��M��预计��过�E�。但要想有效的实现它�q�不是一件容易的事。我曑ְ�地�Ş分块后��用遮挡剔除来完成地�Ş的自遮挡�Q�但是渲染速度不但没有提升�Q�反而有��d��的下降。因此如果要使用遮挡剔除的话必须和引擎结合�v来统一�q�行遮挡��试�Q�才有可能提高效率�?/span>

    现在你应该了(ji��n)解了(ji��n)基本的地形实时渲染方法。要惌��地�Ş的外观更加真实，我们�q�需要更多的工作。我们需要�ؓ(f��)地�Ş加上�U�理贴图和光照。首先考虑地�Ş的光照。由于地形的多边形网格是实时产生的，它会(x��)随着视点的移动而变化，因此如果你直接��用OpenGL内置的顶点光照，你会(x��)得到极度不稳定的光照效果。你�?x��)看到地形表面�?x��)因�ؓ(f��)你的�U�d��而不断蟩动。因此我们必��M��用其他的光照�Ҏ(gu��)��来避免这个问题。我们想��C��(ji��n)光照贴图。光照脓(chu��ng)图是一个游戏中常用的光照技术。它是一个覆盖了(ji��n)场景中所有多边�Ş的脓(chu��ng)图。通过�l�脓(chu��ng)图赋��|��我们可以得到多边形表面复杂的光照效果。��用好的算法计��出来的光照贴图可以模拟极度逼真的光影效果。它�l�我们带来的视觉享受�q�远地超�q�了(ji��n)OpenGL的内�|�光照。有兛_��照脓(chu��ng)囄��计算可以参考我��译的一��文章：(x��)辐射度算�?Radiosity)



   你可以简单地为地形覆盖上单一的纹理，�q�看��h��些许增加�?ji��n)地形的真实性：(x��)

    在上图中�Q�我们创��Z��(ji��n)一个地形，�q�运用了(ji��n)一个重复的�U�理。这个过�E�让地�Ş的无论哪一个区域看��h��都是一��L(f��ng)��Q�例如都是草圎ͼ�(j��)。这昄��不太真实�Q�也�q�于乏味。或�怽��?x��)创��Z��(ji��n)一�q�超大的囄��Q�以拉��覆盖的方式映��到地�Ş表面。这样做的后果是内存开销�q�于庞大�Q�这样做也很�?x��)受到硬件的限制。因此我们应该��用一�U�更好的�U�理贴图方式�Q�纹理烦(ch��)引脓(chu��ng)图�?/span>

    �U�理索引贴图对三个可重复的纹理进行烦(ch��)引脓(chu��ng)图。所谓烦(ch��)引脓(chu��ng)图，��是对三个可重复�U�理�q�行索引�Q�以军_��地�Ş的哪些区域需要��用哪些纹理的混合来脓(chu��ng)图。因为对于�Q意的贴图�Q�都�׃��l�包�?个颜色通道�Q�即R、G、B�Q�的像素�l�成。用于烦(ch��)引的贴图的像素�ƈ不表�C�地形的某个区域的具体颜�Ԍ��而是表示地�Ş的某个区域用何种具体的纹理脓(chu��ng)图。因为具体的�U�理�l�节存储在这三个可重复的�U�理中，因此索引贴图的脓(chu��ng)图方式也为拉伸到地�Ş表面�Q�但它的分��L率可以大大降低�?/span>

    �U�理索引贴图的工作方式如下：(x��)对于地�Ş投媄(ji��ng)到屏�q�上的像素，查找该像素所映射到烦(ch��)引脓(chu��ng)图上的像素。然后根据这一像素R、G、B分量的不同，军_��R、G、B分量所代表的具体纹理脓(chu��ng)囄��混合因子。根据这个�؜合因子�؜合三个可重复贴图后，��؜合得到的最�l�颜色��D��出到屏幕上�?/span>

    例如�Q��o(h��)索引贴图的R分量代表沙�W的纹理，G分量代表草地�Q�B分量代表岩石。如果烦(ch��)引脓(chu��ng)图上一个像素的值是(0,255,0)�Q�即�l�色�Q�则�q�个像素所对应的地形区域的具体�U�理��׃ؓ(f��)草地。如果该像素颜色值是(127,127,0)�Q�即黄色�Q�则该像素所对应的地形区域的�U�理��地和沙�W的�؜合，看�v来既有草�Q�又有沙。又如下图显�C�Z��(ji��n)一个样本烦(ch��)引脓(chu��ng)图，以及(qi��ng)使用该脓(chu��ng)囄��(ch��)引纹理之后的渲染效果�?/span>

索引贴图�Q�R=沙�W�Q�G=草地,B=岩石�Q?/span>

渲染效果

    原理很简单，下面讲解一下具体的实现�q�程。首先，我们准备4个纹理，其中1个纹理烦(ch��)引脓(chu��ng)图，它将被拉伸覆盖整个地形，然后3张细节脓(chu��ng)图，�q�将它们�l�定到相应的�U�理通道上。然后��用Vertex Shader为每个顶点自动计��烦(ch��)引脓(chu��ng)囄��U�理坐标�Q�在Fragment Shader里，对烦(ch��)引脓(chu��ng)图进行纹理查找，使用查找得到的颜色值的RGB颜色信息混合3张细节脓(chu��ng)图，得到当前像素的颜艌Ӏ�最后还应该把这个颜色和光照贴图中的值相乘，得到最�l�的�l�果。下面是相关的Shader代码�Q��用GLSL�~�写�?/span>

Vertex Shader:
uniform float TexInc;   //�U�理�~�放�?用于查找索引�U�理
void main()
{
gl_TexCoord[6] = gl_Vertex;
gl_TexCoord[0] = gl_MultiTexCoord0;
gl_TexCoord[2] = TexInc*vec4(gl_Vertex.xz,0.0,0.0);
gl_Position = ftransform();
}

Fragment Shader:
uniform sampler2D IndexMap;
uniform sampler2D LightMap;
uniform sampler2D texR,texG,texB,texA;
void main()
{
vec4 idx,lm,r,g,b,color;
idx = texture2D(IndexMap,gl_TexCoord[0].xy); //索引�?br> lm = texture2D(LightMap,gl_TexCoord[0].xy); //光照�?br> r = texture2D(texR,gl_TexCoord[2].xy);   //R通道�U�理
g = texture2D(texG,gl_TexCoord[2].xy);   //G通道�U�理
b = texture2D(texB,gl_TexCoord[2].xy);   //B通道�U�理
color = lm*(idx.x*r + idx.y*g+idx.z*b); //混合颜色
gl_FragColor = color;
}

    最后，如果你对本文有不解之处，�Ƣ迎和我共同讨论�?/span>

RedLight 2008-05-11 21:54 发表评论

RedLight — Sun, 11 May 2008 13:32:00 GMT
     摘要:   阅读全文

RedLight 2008-05-11 21:32 发表评论

投媄(ji��ng)矩阵的实��C��?qi��ng)如何从投�?ji��ng)矩阵中获取各视裁体��^�?教程)

RedLight — Wed, 16 Apr 2008 09:49:00 GMT
     摘要:   阅读全文

RedLight 2008-04-16 17:49 发表评论

DirectX 9的坐标系�l�变�?

RedLight — Wed, 16 Apr 2008 09:45:00 GMT
     摘要:   阅读全文

RedLight 2008-04-16 17:45 发表评论

Direct3D中实现图元的鼠标拑֏�

RedLight — Wed, 16 Apr 2008 09:44:00 GMT
     摘要:   阅读全文

RedLight 2008-04-16 17:44 发表评论


索引贴图�Q�R=沙�W�Q�G=草地,B=岩石�Q?/span>	渲染效果

国产精品一区一区,久久久综合香蕉尹人综合网,久久久久久久999精品视频

BigWorld引擎初识大观

D3D与OpenGL常用API对译

D3D常用API

D3d9的一些更�?(�?

优化3D囑�Ş渲染通道负蝲(�?

OpenGL Performance Optimization(�?

SIGGRAPH '97

Course 24: OpenGL and Window System Integration

OpenGL Performance Optimization

Contents

Multiprocessing

Image quality vs. performance

Level of detail management and culling

An Example

3.2 Transformation

3.3 Rasterization

3.4 Texturing

3.5 Clearing

3.6 Miscellaneous

3.7 Window System Integration

3.8 Mesa-specific

4. Evaluation and Tuning

4.1.1 CPU subsystem

4.1.2 Geometry subsystem

4.1.3 Rasterization subsystem

Loading and displaying .X files without DirectX ----OpenGL渲染(�?

在面试一个游戏编�E�职位前,你需要知道的东西(�?

使用光照来表现立体感(�?

使用混合来实现半透明效果

OpenGL片断���试

Opengl�~�程低��错误 (转蝲)

骨骼动画解释(�?

教你如何使用maxscript调试�?转蝲)

��Z��四叉�?w��i)空间划分的地�Ş实时渲染��?gu��)��

投媄(ji��ng)矩阵的实��C���?qi��ng)如何从投�?ji��ng)矩阵中获取各视裁体��^�?教程)

DirectX 9的坐标系�l�变�?

Direct3D中实现图元的鼠标拑֏�

OpenGL片断��试

投媄(ji��ng)矩阵的实��C��?qi��ng)如何从投�?ji��ng)矩阵中获取各视裁体��^�?教程)