This is the version that uses hardware instancing -- it's very fast on my 3-year old laptop, approx 500fps with vsync turned off. edit: about 30 fps with the 4096 explicit batches, and the batches are about as minimal state changes as you can get as it was just changing a couple of uniforms and reissuing the draw
Code: Select all
#define _POSIX_C_SOURCE 199309
#include <cstdlib>
#include <cstdio>
#include <ctime>
#include <cassert>
#include <sstream>
#ifdef WIN32
#include <windows.h>
#endif
#include <OgreRoot.h>
#include "OgreWindowEventUtilities.h"
#include <OgreVector3.h>
#include <OgreQuaternion.h>
#include <OgreRenderSystem.h>
#include <OgreGLRenderSystem.h>
#ifdef WIN32
# include <OgreD3D9RenderSystem.h>
#endif
#include <OgreRenderWindow.h>
#include <OgreHighLevelGpuProgramManager.h>
#include <OgreGpuProgramParams.h>
#include <OgreGpuProgram.h>
#include <OgreHardwarePixelBuffer.h>
bool clicked_close = false;
struct WindowEventListener : Ogre::WindowEventListener {
void windowClosed (Ogre::RenderWindow *rw)
{
(void)rw;
clicked_close = true;
}
} wel;
void mysleep(long micros)
{
if (micros<=0) return;
#ifdef WIN32
long millis = micros/1000;
Sleep(millis);
#else
struct timespec t = {0,micros*1000};
int r = nanosleep(&t, NULL);
if (r) {
perror("sleep");
}
#endif
}
int main (void)
{
try {
// Specify how much logging we want and where we want it to go
Ogre::LogManager *lmgr = OGRE_NEW Ogre::LogManager();
Ogre::Log *ogre_log = OGRE_NEW Ogre::Log("",true,true);
lmgr->setDefaultLog(ogre_log);
lmgr->setLogDetail(Ogre::LL_NORMAL);
// Default to d3d9 on windows unles env var used
#ifdef WIN32
bool use_d3d9 = getenv("OGRE_DIRECT_GL")==NULL;
#else
bool use_d3d9 = false;
#endif
Ogre::RenderSystem *rs;
if (use_d3d9) {
#ifdef WIN32
rs = OGRE_NEW Ogre::D3D9RenderSystem();
rs->setConfigOption("Allow NVPerfHUD","Yes");
rs->setConfigOption("Floating-point mode","Consistent");
rs->setConfigOption("Video Mode","800 x 600 @ 32-bit colour");
#endif
} else {
rs = OGRE_NEW Ogre::GLRenderSystem();
rs->setConfigOption("RTT Preferred Mode","FBO");
rs->setConfigOption("Video Mode","800 x 600");
}
rs->setConfigOption("Full Screen","No");
// Necessary to use Root because it is the only class allowed to _setPrimary on a window
Ogre::Root *ogre_root = OGRE_NEW Ogre::Root("","","");
ogre_root->setRenderSystem(rs);
Ogre::RenderWindow *ogre_win = ogre_root->initialise(true, "Ogre Direct Window");
ogre_win->setDeactivateOnFocusChange(false);
ogre_win->setVSyncEnabled(true);
Ogre::WindowEventUtilities::addWindowEventListener(ogre_win, &wel);
// Viewports are mandatory when rendering, but use them here in a minimal fashion
Ogre::Viewport *ogre_vp = ogre_win->addViewport(NULL, 1, 0,0,1,1);
// each cube face has 4 vertexes, 2 triangles
// we do not share vertexes between adjacent faces because we want sharp normals / colour transitions
const unsigned vertexes = 6 * 4;
const unsigned triangles = 6 * 2;
const unsigned num_cubes = 4096;
const int num_cols = 64; // cubes per row
// Prepare vertex buffer
Ogre::VertexData vdata;
vdata.vertexStart = 0;
vdata.vertexCount = vertexes;
// Non-instanced data
unsigned vdecl_size = 0;
vdecl_size += vdata.vertexDeclaration->addElement(0, vdecl_size, Ogre::VET_FLOAT3, Ogre::VES_POSITION).getSize();
vdecl_size += vdata.vertexDeclaration->addElement(0, vdecl_size, Ogre::VET_FLOAT2, Ogre::VES_TEXTURE_COORDINATES, 0).getSize();
vdecl_size += vdata.vertexDeclaration->addElement(0, vdecl_size, Ogre::VET_FLOAT3, Ogre::VES_NORMAL).getSize();
vdecl_size += vdata.vertexDeclaration->addElement(0, vdecl_size, Ogre::VET_COLOUR, Ogre::VES_DIFFUSE).getSize();
struct VDataRaw { float p[3], uv[2]; Ogre::Vector3 n; uint32_t col; }; // careful with padding here, all prims are 4 bytes long
Ogre::HardwareVertexBufferSharedPtr vbuf =
Ogre::HardwareBufferManager::getSingleton().createVertexBuffer(
vdecl_size, vertexes, Ogre::HardwareBuffer::HBU_DYNAMIC_WRITE_ONLY);
vdata.vertexBufferBinding->setBinding(0, vbuf);
// VET_COLOUR requires conversion by rendersystem to appropriate format
uint32_t col_up; rs->convertColourValue(Ogre::ColourValue(1.0f, 1.0f, 1.0f), &col_up); // white
uint32_t col_down; rs->convertColourValue(Ogre::ColourValue(1.0f, 1.0f, 0.5f), &col_down); // yellow
Ogre::Vector3 norm_up(0,0, 1);
Ogre::Vector3 norm_down(0,0,-1);
uint32_t col_east; rs->convertColourValue(Ogre::ColourValue(1.0f, 0.5f, 0.5f), &col_east); // red
uint32_t col_west; rs->convertColourValue(Ogre::ColourValue(1.0f, 0.75f, 0.5f), &col_west); // orange
Ogre::Vector3 norm_east( 1,0,0);
Ogre::Vector3 norm_west(-1,0,0);
uint32_t col_north; rs->convertColourValue(Ogre::ColourValue(0.5f, 0.5f, 1.0f), &col_north); // blue
uint32_t col_south; rs->convertColourValue(Ogre::ColourValue(0.5f, 1.0f, 0.5f), &col_south); // green
Ogre::Vector3 norm_north(0, 1,0);
Ogre::Vector3 norm_south(0,-1,0);
VDataRaw vdata_raw[] = {
{ { -1, -1, 1 }, { 0, 0 }, norm_up, col_up },
{ { 1, -1, 1 }, { 1, 0 }, norm_up, col_up },
{ { -1, 1, 1 }, { 0, 1 }, norm_up, col_up },
{ { 1, 1, 1 }, { 1, 1 }, norm_up, col_up },
{ { -1, 1, -1 }, { 0, 0 }, norm_down, col_down },
{ { 1, 1, -1 }, { 1, 0 }, norm_down, col_down },
{ { -1, -1, -1 }, { 0, 1 }, norm_down, col_down },
{ { 1, -1, -1 }, { 1, 1 }, norm_down, col_down },
{ { -1, -1, -1 }, { 0, 0 }, norm_west, col_west },
{ { -1, -1, 1 }, { 1, 0 }, norm_west, col_west },
{ { -1, 1, -1 }, { 0, 1 }, norm_west, col_west },
{ { -1, 1, 1 }, { 1, 1 }, norm_west, col_west },
{ { 1, 1, -1 }, { 0, 0 }, norm_east, col_east },
{ { 1, 1, 1 }, { 1, 0 }, norm_east, col_east },
{ { 1, -1, -1 }, { 0, 1 }, norm_east, col_east },
{ { 1, -1, 1 }, { 1, 1 }, norm_east, col_east },
{ { -1, 1, -1 }, { 0, 0 }, norm_north, col_north },
{ { -1, 1, 1 }, { 1, 0 }, norm_north, col_north },
{ { 1, 1, -1 }, { 0, 1 }, norm_north, col_north },
{ { 1, 1, 1 }, { 1, 1 }, norm_north, col_north },
{ { 1, -1, -1 }, { 0, 0 }, norm_south, col_south },
{ { 1, -1, 1 }, { 1, 0 }, norm_south, col_south },
{ { -1, -1, -1 }, { 0, 1 }, norm_south, col_south },
{ { -1, -1, 1 }, { 1, 1 }, norm_south, col_south },
};
vbuf->writeData(vdata.vertexStart, vdata.vertexCount*vdecl_size, &vdata_raw[0]);
// Instanced data
// could reduce bus bandwidth by 25% by keeping pos in a separate buffer, thus updating only rotation each frame, but not doing that
unsigned vdecl_inst_size = 0;
vdecl_inst_size += vdata.vertexDeclaration->addElement(1, vdecl_inst_size, Ogre::VET_FLOAT3, Ogre::VES_TEXTURE_COORDINATES, 1).getSize();
vdecl_inst_size += vdata.vertexDeclaration->addElement(1, vdecl_inst_size, Ogre::VET_FLOAT3, Ogre::VES_TEXTURE_COORDINATES, 2).getSize();
vdecl_inst_size += vdata.vertexDeclaration->addElement(1, vdecl_inst_size, Ogre::VET_FLOAT3, Ogre::VES_TEXTURE_COORDINATES, 3).getSize();
vdecl_inst_size += vdata.vertexDeclaration->addElement(1, vdecl_inst_size, Ogre::VET_FLOAT3, Ogre::VES_TEXTURE_COORDINATES, 4).getSize();
struct VDataInstRaw { Ogre::Matrix3 rot; Ogre::Vector3 pos; }; // careful with padding here, all prims are 4 bytes long
Ogre::HardwareVertexBufferSharedPtr vbuf_inst =
Ogre::HardwareBufferManager::getSingleton().createVertexBuffer(
vdecl_inst_size, num_cubes, Ogre::HardwareBuffer::HBU_DYNAMIC_WRITE_ONLY);
vbuf_inst->setIsInstanceData(true);
vbuf_inst->setInstanceDataStepRate(1);
vdata.vertexBufferBinding->setBinding(1, vbuf_inst);
VDataInstRaw vdata_inst_raw[num_cubes]; // leave 0 for now
for (unsigned i=0 ; i<num_cubes ; ++i) {
int row = num_cols/2 - i / num_cols;
int col = i % num_cols - (num_cols/2);
vdata_inst_raw[i].pos = Ogre::Vector3(col*3.0, row*3.0, -250);
Ogre::Quaternion(Ogre::Degree(360.0f * rand()/float(RAND_MAX)), Ogre::Vector3(0,0,1)).ToRotationMatrix(vdata_inst_raw[i].rot);
}
vbuf_inst->writeData(0, num_cubes*vdecl_inst_size, &vdata_inst_raw[0]);
// Prepare index buffer
Ogre::IndexData idata;
idata.indexBuffer = Ogre::HardwareBufferManager::getSingleton()
.createIndexBuffer(Ogre::HardwareIndexBuffer::IT_16BIT, 3*triangles, Ogre::HardwareBuffer::HBU_DYNAMIC_WRITE_ONLY);
idata.indexStart = 0;
idata.indexCount = 3*triangles;
uint16_t idata_raw[3*triangles];
for (int i=0 ; i<6 ; ++i) { // 6 cube faces
idata_raw[i*6 + 0] = 4*i + 0;
idata_raw[i*6 + 1] = 4*i + 1;
idata_raw[i*6 + 2] = 4*i + 2;
idata_raw[i*6 + 3] = 4*i + 1;
idata_raw[i*6 + 4] = 4*i + 3;
idata_raw[i*6 + 5] = 4*i + 2;
};
idata.indexBuffer->writeData(idata.indexStart, idata.indexCount*sizeof(uint16_t), &idata_raw[0]);
// Prepare texture (8 bit mono)
uint8_t *raw_tex = new uint8_t[512*512];
for (unsigned y=0 ; y<512 ; ++y) {
for (unsigned x=0 ; x<512 ; ++x) {
float x_ = (float(x)-256)/256;
float y_ = (float(y)-256)/256;
// compute a solid colour circle that fades to black beyond its boundary
float intensity = (2-sqrtf(x_*x_*2 + y_*y_*2));
intensity = intensity > 1 ? 1 : intensity;
intensity = intensity < 0.5 ? 0.5 : intensity;
raw_tex[y*512+x] = intensity * 255;
}
}
Ogre::DataStreamPtr raw_tex_ptr = Ogre::DataStreamPtr(new Ogre::MemoryDataStream(raw_tex,512*512*1));
// Load raw byte array into an Image
Ogre::Image img;
img.loadRawData(raw_tex_ptr, 512, 512, 1, Ogre::PF_L8, 1, 0);
// Create texture based on img
Ogre::TexturePtr tex = Ogre::TextureManager::getSingleton().loadImage("MyTexture", "General", img);
// Initialise vertex program
Ogre::HighLevelGpuProgramPtr vp = Ogre::HighLevelGpuProgramManager::getSingleton()
.createProgram("MyVertexProgram", "General", "glsl", Ogre::GPT_VERTEX_PROGRAM);
const char *vertex_program_code =
"#version 130\n"
"uniform mat4x4 proj;\n"
"in vec3 vertex;\n" // these 'in' names are used by ogre to determine the semantic -- do not change them
"in vec2 uv0;\n"
"in vec3 uv1;\n"
"in vec3 uv2;\n"
"in vec3 uv3;\n"
"in vec3 uv4;\n"
"in vec3 normal;\n"
"in vec4 colour;\n"
"out vec4 interp_colour;\n"
"out vec2 interp_uv;\n"
"out vec3 interp_normal;\n"
"void main()\n"
"{\n"
" mat3x3 instance_rot = mat3x3(uv1, uv2, uv3);\n"
" vec3 instance_pos = uv4;\n"
" gl_Position = proj * vec4(instance_rot * vertex + instance_pos, 1);\n"
" interp_colour = colour;\n"
" interp_uv = uv0;\n"
" interp_normal = instance_rot * normal;\n"
"}\n";
vp->setSource(vertex_program_code);
vp->load();
// Initialise fragment program
Ogre::HighLevelGpuProgramPtr fp = Ogre::HighLevelGpuProgramManager::getSingleton()
.createProgram("MyFragmentProgram", "General", "glsl", Ogre::GPT_FRAGMENT_PROGRAM);
const char *fragment_program_code =
"#version 130\n"
"uniform sampler2D tex;\n"
"uniform vec3 light_dir;\n"
"in vec4 interp_colour;\n"
"in vec2 interp_uv;\n"
"in vec3 interp_normal;\n"
"out vec3 fb_colour;\n"
"void main()\n"
"{\n"
" vec3 colour = interp_colour.rgb * texture2D(tex, interp_uv).rgb;\n"
" float illumination = 0.5 + 0.5*dot(light_dir, normalize(interp_normal));\n"
" fb_colour = colour * illumination;\n"
"}\n";
fp->setSource(fragment_program_code);
fp->load();
// Set up program parameters
Ogre::GpuProgramParametersSharedPtr vertex_params = vp->createParameters();
Ogre::GpuProgramParametersSharedPtr fragment_params = fp->createParameters();
Ogre::Matrix4 proj;
rs->_makeProjectionMatrix (Ogre::Degree(45), 1.0f, 0.2f, 800.0f, proj, true);
vertex_params->setNamedConstant("proj", proj); // no view matrix -- camera fixed
fragment_params->setNamedConstant("light_dir", Ogre::Vector3(-1.0f, 1.0f, 10.0f).normalisedCopy());
// Initialise cpu data (for animating scene)
Ogre::Matrix3 rot_velocities[num_cubes];
for (unsigned i=0 ; i<num_cubes ; ++i) {
// random rotational velocity -- 1 degree about random axis
Ogre::Vector3 axis;
do {
// a uniformly distributed random vector to somewhere in the cube [-1, 1]
axis = Ogre::Vector3(rand() / float(RAND_MAX),
rand() / float(RAND_MAX),
rand() / float(RAND_MAX)) * 2 - 1;
} while (axis.squaredLength() > 1 || axis.squaredLength() < 0.0001);
axis.normalise();
// axis is now a random vector uniformly distributed on the surface of the unit sphere
Ogre::Quaternion(Ogre::Degree(4), axis).ToRotationMatrix(rot_velocities[i]);
}
Ogre::Timer timer;
// Rendering loop
while (!clicked_close) {
// handle window resizes etc
Ogre::WindowEventUtilities::messagePump();
// Tell rs where to render (necessary since resize may have changed things since last frame)
rs->_setViewport(ogre_vp);
// animate scene
for (unsigned i=0 ; i<num_cubes ; ++i) {
vdata_inst_raw[i].rot = rot_velocities[i] * vdata_inst_raw[i].rot;
}
vbuf_inst->writeData(0, num_cubes*vdecl_inst_size, &vdata_inst_raw[0]);
// necessary for memory leak
if (ogre_win->isActive()) {
rs->_beginFrame();
rs->clearFrameBuffer(Ogre::FBT_COLOUR | Ogre::FBT_DEPTH | Ogre::FBT_STENCIL, Ogre::ColourValue(0, 0, 0.5));
// material settings for cubes
rs->_setCullingMode(Ogre::CULL_CLOCKWISE);
rs->_setDepthBufferParams(true, true, Ogre::CMPF_LESS_EQUAL);
rs->_setTexture(0, true, tex);
Ogre::TextureUnitState::UVWAddressingMode addr_mode = {
Ogre::TextureUnitState::TAM_WRAP,
Ogre::TextureUnitState::TAM_WRAP,
Ogre::TextureUnitState::TAM_WRAP
};
rs->_setTextureAddressingMode(0, addr_mode);
rs->_setTextureLayerAnisotropy(0, 16);
rs->_setTextureUnitFiltering(0, Ogre::FO_ANISOTROPIC, Ogre::FO_ANISOTROPIC, Ogre::FO_LINEAR);
rs->bindGpuProgram(vp->_getBindingDelegate());
rs->bindGpuProgram(fp->_getBindingDelegate());
rs->bindGpuProgramParameters(Ogre::GPT_FRAGMENT_PROGRAM, fragment_params, Ogre::GPV_ALL);
rs->bindGpuProgramParameters(Ogre::GPT_VERTEX_PROGRAM, vertex_params, Ogre::GPV_ALL);
// render the instances
Ogre::RenderOperation op;
op.useIndexes = true;
op.vertexData = &vdata;
op.indexData = &idata;
op.operationType = Ogre::RenderOperation::OT_TRIANGLE_LIST;
op.numberOfInstances = 4096;
rs->_render(op);
rs->_endFrame();
// display rendered frame, vsync param ignored here, at least in gl
rs->_swapAllRenderTargetBuffers(true);
std::cout << "FPS: " << 1/ (timer.getMicroseconds() / 1E6) << std::endl;
timer.reset();
} else {
mysleep(10000);
}
}
} catch (Ogre::Exception &e) {
std::cerr << e.getFullDescription() << std::endl;
}
return EXIT_SUCCESS;
}
// vim: shiftwidth=4:tabstop=4:expandtab