For ray picking rendered objects (like by mouse) the best option is to use the already rendered buffers as there is very little cost of reading them in comparison to ray intersection tests on complex scene. The idea is to render each pick-able rendered object to separate buffer per each info you need about them for example like this:
-
Depth buffer
this will give you the 3D position of the ray intersection with object.
-
Stencil buffer
if each object rendered to stencil with its ID (or its index in object list) then you can get the picked object directly.
-
any other
there are also secondary color attachments and FBO’s out there. So you can add any other stuff like normal vector or what ever you need.
If coded right all of this will reduce performance only slightly (even not at all) as you do not need to compute anything its just a single write per fragment per buffer.
The picking itself is easy you just read the corresponding pixel from all the buffers you need and convert to wanted format.
Here simple C++/VCL example using fixed pipeline (no shaders)…
//---------------------------------------------------------------------------
#include <vcl.h>
#include <math.h>
#pragma hdrstop
#include "Unit1.h"
#include "gl_simple.h"
//---------------------------------------------------------------------------
#pragma package(smart_init)
#pragma resource "*.dfm"
TForm1 *Form1;
//---------------------------------------------------------------------------
void matrix_mul_vector(double *c,double *a,double *b,double w=1.0)
{
double q[3];
q[0]=(a[ 0]*b[0])+(a[ 4]*b[1])+(a[ 8]*b[2])+(a[12]*w);
q[1]=(a[ 1]*b[0])+(a[ 5]*b[1])+(a[ 9]*b[2])+(a[13]*w);
q[2]=(a[ 2]*b[0])+(a[ 6]*b[1])+(a[10]*b[2])+(a[14]*w);
for(int i=0;i<3;i++) c[i]=q[i];
}
//---------------------------------------------------------------------------
class glMouse
{
public:
int sx,sy; // framebuffer position [pixels]
double pos[3]; // [GCS] ray end coordinate (or z_far)
double beg[3]; // [GCS] ray start (z_near)
double dir[3]; // [GCS] ray direction
double depth; // [GCS] perpendicular distance to camera
WORD id; // selected object id
double x0,y0,xs,ys,zFar,zNear; // viewport and projection
double *eye; // camera direct matrix pointer
double fx,fy; // perspective scales
glMouse(){ eye=NULL; for (int i=0;i<3;i++) { pos[i]=0.0; beg[i]=0.0; dir[i]=0.0; } id=0; x0=0.0; y0=0.0; xs=0.0; ys=0.0; fx=0.0; fy=0.0; depth=0.0; }
glMouse(glMouse& a){ *this=a; };
~glMouse(){};
glMouse* operator = (const glMouse *a) { *this=*a; return this; };
// glMouse* operator = (const glMouse &a) { ...copy... return this; };
void resize(double _x0,double _y0,double _xs,double _ys,double *_eye)
{
double per[16];
x0=_x0; y0=_y0; xs=_xs; ys=_ys; eye=_eye;
glGetDoublev(GL_PROJECTION_MATRIX,per);
zFar =0.5*per[14]*(1.0-((per[10]-1.0)/(per[10]+1.0)));
zNear=zFar*(per[10]+1.0)/(per[10]-1.0);
fx=per[0];
fy=per[5];
}
void pick(double x,double y) // test screen x,y [pixels] position
{
int i;
double l;
GLfloat _z;
GLint _id;
sx=x; sy=ys-1.0-y;
// read depth z and linearize
glReadPixels(sx,sy,1,1,GL_DEPTH_COMPONENT,GL_FLOAT,&_z);// read depth value
depth=_z; // logarithmic
depth=(2.0*depth)-1.0; // logarithmic NDC
z=(2.0*zNear*zFar)/(zFar+zNear-(z*(zFar-zNear))); // linear <zNear,zFar>
// read object ID
glReadPixels(sx,sy,1,1,GL_STENCIL_INDEX,GL_INT,&_id); // read stencil value
id=_id;
// win [pixel] -> GL NDC <-1,+1>
x= (2.0*(x-x0)/xs)-1.0;
y=1.0-(2.0*(y-y0)/ys);
// ray start GL camera [LCS]
beg[2]=-zNear;
beg[1]=(-beg[2]/fy)*y;
beg[0]=(-beg[2]/fx)*x;
// ray direction GL camera [LCS]
for (l=0.0,i=0;i<3;i++) l+=beg[i]*beg[i]; l=1.0/sqrt(l);
for (i=0;i<3;i++) dir[0]=beg[0]*l;
// ray end GL camera [LCS]
pos[2]=-depth;
pos[1]=(-pos[2]/fy)*y;
pos[0]=(-pos[2]/fx)*x;
// convert to [GCS]
matrix_mul_vector(beg,eye,beg);
matrix_mul_vector(pos,eye,pos);
matrix_mul_vector(dir,eye,dir,0.0);
}
};
//---------------------------------------------------------------------------
// camera & mouse
double eye[16],ieye[16]; // direct view,inverse view and perspective matrices
glMouse mouse;
// objects
struct object
{
WORD id; // unique non zero ID
double m[16]; // direct model matrix
object(){}; object(object& a){ *this=a; }; ~object(){}; object* operator = (const object *a) { *this=*a; return this; }; /*object* operator = (const object &a) { ...copy... return this; };*/
};
const int objs=7;
object obj[objs];
// textures
GLuint txr=-1;
//---------------------------------------------------------------------------
void matrix_inv(double *a,double *b) // a[16] = Inverse(b[16])
{
double x,y,z;
// transpose of rotation matrix
a[ 0]=b[ 0];
a[ 5]=b[ 5];
a[10]=b[10];
x=b[1]; a[1]=b[4]; a[4]=x;
x=b[2]; a[2]=b[8]; a[8]=x;
x=b[6]; a[6]=b[9]; a[9]=x;
// copy projection part
a[ 3]=b[ 3];
a[ 7]=b[ 7];
a[11]=b[11];
a[15]=b[15];
// convert origin: new_pos = - new_rotation_matrix * old_pos
x=(a[ 0]*b[12])+(a[ 4]*b[13])+(a[ 8]*b[14]);
y=(a[ 1]*b[12])+(a[ 5]*b[13])+(a[ 9]*b[14]);
z=(a[ 2]*b[12])+(a[ 6]*b[13])+(a[10]*b[14]);
a[12]=-x;
a[13]=-y;
a[14]=-z;
}
//---------------------------------------------------------------------------
void gl_draw()
{
int i; object *o;
double a;
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT );
glEnable(GL_CULL_FACE);
glEnable(GL_DEPTH_TEST);
glEnable(GL_STENCIL_TEST);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glStencilMask(0xFFFF); // Write to stencil buffer
glStencilFunc(GL_ALWAYS,0,0xFFFF); // Set any stencil to 0
for (o=obj,i=0;i<objs;i++,o++)
{
glMatrixMode(GL_MODELVIEW);
glLoadMatrixd(ieye);
glMultMatrixd(o->m);
glStencilFunc(GL_ALWAYS,o->id,0xFFFF); // Set any stencil to object ID
vao_draw();
}
glStencilFunc(GL_ALWAYS,0,0xFFFF); // Set any stencil to 0
glDisable(GL_STENCIL_TEST); // no need fot testing
// render mouse
glMatrixMode(GL_MODELVIEW);
glLoadMatrixd(ieye);
a=0.1*mouse.depth;
glColor3f(0.0,1.0,0.0);
glBegin(GL_LINES);
glVertex3d(mouse.pos[0]+a,mouse.pos[1],mouse.pos[2]);
glVertex3d(mouse.pos[0]-a,mouse.pos[1],mouse.pos[2]);
glVertex3d(mouse.pos[0],mouse.pos[1]+a,mouse.pos[2]);
glVertex3d(mouse.pos[0],mouse.pos[1]-a,mouse.pos[2]);
glVertex3d(mouse.pos[0],mouse.pos[1],mouse.pos[2]+a);
glVertex3d(mouse.pos[0],mouse.pos[1],mouse.pos[2]-a);
glEnd();
Form1->Caption=AnsiString().sprintf("%.3lf , %.3lf , %.3lf : %u",mouse.pos[0],mouse.pos[1],mouse.pos[2],mouse.id);
// debug buffer views
if ((Form1->ck_depth->Checked)||(Form1->ck_stencil->Checked))
{
glDisable(GL_DEPTH_TEST);
glMatrixMode(GL_PROJECTION);
glPushMatrix();
glLoadIdentity();
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glEnable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D,txr);
GLfloat *f=new GLfloat[xs*ys],z;
if (Form1->ck_depth ->Checked)
{
glReadPixels(0,0,xs,ys,GL_DEPTH_COMPONENT,GL_FLOAT,f);
for (i=0;i<xs*ys;i++) f[i]=1.0-(2.0*mouse.zNear)/(mouse.zFar+mouse.zNear-(((2.0*f[i])-1.0)*(mouse.zFar-mouse.zNear)));
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, xs, ys, 0, GL_RED, GL_FLOAT, f);
}
if (Form1->ck_stencil->Checked)
{
glReadPixels(0,0,xs,ys,GL_STENCIL_INDEX,GL_FLOAT,f);
for (i=0;i<xs*ys;i++) f[i]/=float(objs);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, xs, ys, 0, GL_GREEN, GL_FLOAT, f);
}
delete[] f;
glColor3f(1.0,1.0,1.0);
glBegin(GL_QUADS);
glTexCoord2f(1.0,0.0); glVertex2f(+1.0,-1.0);
glTexCoord2f(1.0,1.0); glVertex2f(+1.0,+1.0);
glTexCoord2f(0.0,1.0); glVertex2f(-1.0,+1.0);
glTexCoord2f(0.0,0.0); glVertex2f(-1.0,-1.0);
glEnd();
glMatrixMode(GL_PROJECTION);
glPopMatrix();
glDisable(GL_TEXTURE_2D);
glEnable(GL_DEPTH_TEST);
}
glFlush();
SwapBuffers(hdc);
}
//---------------------------------------------------------------------------
__fastcall TForm1::TForm1(TComponent* Owner):TForm(Owner)
{
int i;
object *o;
gl_init(Handle);
vao_init();
// init textures
glGenTextures(1,&txr);
glEnable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D,txr);
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S,GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T,GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER,GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,GL_NEAREST);
glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE,GL_COPY);
glDisable(GL_TEXTURE_2D);
// init objects
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glTranslatef(-1.5,4.7,-8.0);
for (o=obj,i=0;i<objs;i++,o++)
{
o->id=i+1; // unique non zero ID
glGetDoublev(GL_MODELVIEW_MATRIX,o->m);
glRotatef(360.0/float(objs),0.0,0.0,1.0);
glTranslatef(-3.0,0.0,0.0);
}
for (o=obj,i=0;i<objs;i++,o++)
{
glLoadMatrixd(o->m);
glRotatef(180.0*Random(),Random(),Random(),Random());
glGetDoublev(GL_MODELVIEW_MATRIX,o->m);
}
}
//---------------------------------------------------------------------------
void __fastcall TForm1::FormDestroy(TObject *Sender)
{
glDeleteTextures(1,&txr);
gl_exit();
vao_exit();
}
//---------------------------------------------------------------------------
void __fastcall TForm1::FormResize(TObject *Sender)
{
gl_resize(ClientWidth,ClientHeight);
// obtain/init matrices
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glTranslatef(0,0,-15.0);
glGetDoublev(GL_MODELVIEW_MATRIX,ieye);
matrix_inv(eye,ieye);
mouse.resize(0,0,xs,ys,eye);
}
//---------------------------------------------------------------------------
void __fastcall TForm1::FormPaint(TObject *Sender)
{
gl_draw();
}
//---------------------------------------------------------------------------
void __fastcall TForm1::Timer1Timer(TObject *Sender)
{
gl_draw();
}
//---------------------------------------------------------------------------
void __fastcall TForm1::FormMouseWheel(TObject *Sender, TShiftState Shift, int WheelDelta, TPoint &MousePos, bool &Handled)
{
GLfloat dz=2.0;
if (WheelDelta<0) dz=-dz;
glMatrixMode(GL_MODELVIEW);
glLoadMatrixd(ieye);
glTranslatef(0,0,dz);
glGetDoublev(GL_MODELVIEW_MATRIX,ieye);
matrix_inv(eye,ieye);
gl_draw();
}
//---------------------------------------------------------------------------
void __fastcall TForm1::FormMouseMove(TObject *Sender, TShiftState Shift, int X, int Y)
{
mouse.pick(X,Y);
}
//---------------------------------------------------------------------------
void __fastcall TForm1::ck_depthClick(TObject *Sender)
{
gl_draw();
}
//---------------------------------------------------------------------------
Here preview of from left RGB,Depth,Stencil:
Here captured GIF:
the first 3 numbers are the 3D position of picked pixel in [GCS]
and the last number in caption is the picked ID where 0
means no object.
The example is using gl_simple,h
from here:
- simple complete GL+VAO/VBO+GLSL+shaders example in C++
You can ignore the VCL stuff as its not important just port the events to your environment…
So what to do:
-
rendering
You need add stencil buffer to your GL window pixel format so in my case I just add:
pfd.cStencilBits = 16;
into
gl_init()
function fromgl_simple.h
. Also add its bit intoglClear
and set each objects stencil to its ID Like I did ingl_draw()
. -
picking
I wrote a small
glMouse
class that do all the heavy lifting. On each change of perspective, view, or viewport call itsglMouse::resize
function. That will prepare all the constants needed for the computations later. Beware it needs direct camera/view matrix !!!Now on each mouse movement (or click or whatever) call the
glMouse::pick
function and then use the results likeid
which will return the ID picked object was rendered with orpos
which is the 3D coordinate in global world coordinates ([GCS]
) of the ray object intersection.The function just read the depth and stencil buffers. Linearize depth like here:
- depth buffer got by glReadPixels is always 1
and compute the ray
beg,dir,pos,depth
in[GCS]
. -
Normal
You got 2 options either render your normal as another buffer which is the simplest and most precise. Or read depths of 2 or more neighboring pixels around picked one compute their 3D positions. From that using cross product compute you normal(s) and average if needed. But this can lead to artifacts on edges.
As mentioned in the comments to boost accuracy you should use linear depth buffer instead of linearized logarithmic like this:
- Linear depth buffer
Btw I used the same technique in here (in GDI based SW isometric render):
- Improving performance of click detection on a staggered column isometric grid
[Edit1] 8bit stencil buffer
Well these days the reliable stencil bitwidth is only 8bit which limits the number of ids to 255. That is in most cases not enough. A workaround is to render the indexes as colors then store the frame into CPU memory and then render colors normaly. Then when needed using the stored frame for picking. Rendering to texture or color attachment is also a possibility.
[Edit2] some related links
- objects moving with mouse
- objects moving and orienting with mouse