OpenGL 3D-raypicking with high poly meshes

For ray picking rendered objects (like by mouse) the best option is to use the already rendered buffers as there is very little cost of reading them in comparison to ray intersection tests on complex scene. The idea is to render each pick-able rendered object to separate buffer per each info you need about them for example like this:

  1. Depth buffer

    this will give you the 3D position of the ray intersection with object.

  2. Stencil buffer

    if each object rendered to stencil with its ID (or its index in object list) then you can get the picked object directly.

  3. any other

    there are also secondary color attachments and FBO’s out there. So you can add any other stuff like normal vector or what ever you need.

If coded right all of this will reduce performance only slightly (even not at all) as you do not need to compute anything its just a single write per fragment per buffer.

The picking itself is easy you just read the corresponding pixel from all the buffers you need and convert to wanted format.

Here simple C++/VCL example using fixed pipeline (no shaders)…

//---------------------------------------------------------------------------
#include <vcl.h>
#include <math.h>
#pragma hdrstop
#include "Unit1.h"
#include "gl_simple.h"
//---------------------------------------------------------------------------
#pragma package(smart_init)
#pragma resource "*.dfm"
TForm1 *Form1;
//---------------------------------------------------------------------------
void matrix_mul_vector(double *c,double *a,double *b,double w=1.0)
    {
    double q[3];
    q[0]=(a[ 0]*b[0])+(a[ 4]*b[1])+(a[ 8]*b[2])+(a[12]*w);
    q[1]=(a[ 1]*b[0])+(a[ 5]*b[1])+(a[ 9]*b[2])+(a[13]*w);
    q[2]=(a[ 2]*b[0])+(a[ 6]*b[1])+(a[10]*b[2])+(a[14]*w);
    for(int i=0;i<3;i++) c[i]=q[i];
    }
//---------------------------------------------------------------------------
class glMouse
    {
public:
    int sx,sy;      // framebuffer position [pixels]
    double pos[3];  // [GCS] ray end coordinate (or z_far)
    double beg[3];  // [GCS] ray start (z_near)
    double dir[3];  // [GCS] ray direction
    double depth;   // [GCS] perpendicular distance to camera
    WORD id;        // selected object id
    double x0,y0,xs,ys,zFar,zNear;  // viewport and projection
    double *eye;    // camera direct matrix pointer
    double fx,fy;   // perspective scales

    glMouse(){ eye=NULL; for (int i=0;i<3;i++) { pos[i]=0.0; beg[i]=0.0; dir[i]=0.0; } id=0; x0=0.0; y0=0.0; xs=0.0; ys=0.0; fx=0.0; fy=0.0; depth=0.0; }
    glMouse(glMouse& a){ *this=a; };
    ~glMouse(){};
    glMouse* operator = (const glMouse *a) { *this=*a; return this; };
//  glMouse* operator = (const glMouse &a) { ...copy... return this; };

    void resize(double _x0,double _y0,double _xs,double _ys,double *_eye)
        {
        double per[16];
        x0=_x0; y0=_y0; xs=_xs; ys=_ys; eye=_eye;
        glGetDoublev(GL_PROJECTION_MATRIX,per);
        zFar =0.5*per[14]*(1.0-((per[10]-1.0)/(per[10]+1.0)));
        zNear=zFar*(per[10]+1.0)/(per[10]-1.0);
        fx=per[0];
        fy=per[5];
        }

    void pick(double x,double y)    // test screen x,y [pixels] position
        {
        int i;
        double l;
        GLfloat _z;
        GLint _id;
        sx=x; sy=ys-1.0-y;
        // read depth z and linearize
        glReadPixels(sx,sy,1,1,GL_DEPTH_COMPONENT,GL_FLOAT,&_z);// read depth value
        depth=_z;                                               // logarithmic
        depth=(2.0*depth)-1.0;                                  // logarithmic NDC
        z=(2.0*zNear*zFar)/(zFar+zNear-(z*(zFar-zNear)));       // linear <zNear,zFar>
        
        // read object ID
        glReadPixels(sx,sy,1,1,GL_STENCIL_INDEX,GL_INT,&_id);   // read stencil value
        id=_id;
        // win [pixel] -> GL NDC <-1,+1>
        x=    (2.0*(x-x0)/xs)-1.0;
        y=1.0-(2.0*(y-y0)/ys);
        // ray start GL camera [LCS]
        beg[2]=-zNear;
        beg[1]=(-beg[2]/fy)*y;
        beg[0]=(-beg[2]/fx)*x;
        // ray direction GL camera [LCS]
        for (l=0.0,i=0;i<3;i++) l+=beg[i]*beg[i]; l=1.0/sqrt(l);
        for (i=0;i<3;i++) dir[0]=beg[0]*l;
        // ray end GL camera [LCS]
        pos[2]=-depth;
        pos[1]=(-pos[2]/fy)*y;
        pos[0]=(-pos[2]/fx)*x;
        // convert to [GCS]
        matrix_mul_vector(beg,eye,beg);
        matrix_mul_vector(pos,eye,pos);
        matrix_mul_vector(dir,eye,dir,0.0);
        }
    };
//---------------------------------------------------------------------------
// camera & mouse
double eye[16],ieye[16];    // direct view,inverse view and perspective matrices
glMouse mouse;
// objects
struct object
    {
    WORD id;                // unique non zero ID
    double m[16];           // direct model matrix
    object(){}; object(object& a){ *this=a; }; ~object(){}; object* operator = (const object *a) { *this=*a; return this; }; /*object* operator = (const object &a) { ...copy... return this; };*/
    };
const int objs=7;
object obj[objs];
// textures
GLuint txr=-1;
//---------------------------------------------------------------------------
void  matrix_inv(double *a,double *b) // a[16] = Inverse(b[16])
    {
    double x,y,z;
    // transpose of rotation matrix
    a[ 0]=b[ 0];
    a[ 5]=b[ 5];
    a[10]=b[10];
    x=b[1]; a[1]=b[4]; a[4]=x;
    x=b[2]; a[2]=b[8]; a[8]=x;
    x=b[6]; a[6]=b[9]; a[9]=x;
    // copy projection part
    a[ 3]=b[ 3];
    a[ 7]=b[ 7];
    a[11]=b[11];
    a[15]=b[15];
    // convert origin: new_pos = - new_rotation_matrix * old_pos
    x=(a[ 0]*b[12])+(a[ 4]*b[13])+(a[ 8]*b[14]);
    y=(a[ 1]*b[12])+(a[ 5]*b[13])+(a[ 9]*b[14]);
    z=(a[ 2]*b[12])+(a[ 6]*b[13])+(a[10]*b[14]);
    a[12]=-x;
    a[13]=-y;
    a[14]=-z;
    }
//---------------------------------------------------------------------------
void gl_draw()
    {
    int i; object *o;
    double a;

    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT );
    glEnable(GL_CULL_FACE);
    glEnable(GL_DEPTH_TEST);

    glEnable(GL_STENCIL_TEST);
    glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
    glStencilMask(0xFFFF); // Write to stencil buffer
    glStencilFunc(GL_ALWAYS,0,0xFFFF);  // Set any stencil to 0

    for (o=obj,i=0;i<objs;i++,o++)
        {
        glMatrixMode(GL_MODELVIEW);
        glLoadMatrixd(ieye);
        glMultMatrixd(o->m);
        glStencilFunc(GL_ALWAYS,o->id,0xFFFF); // Set any stencil to object ID
        vao_draw();
        }
    glStencilFunc(GL_ALWAYS,0,0xFFFF);  // Set any stencil to 0
    glDisable(GL_STENCIL_TEST);         // no need fot testing

    // render mouse
    glMatrixMode(GL_MODELVIEW);
    glLoadMatrixd(ieye);

    a=0.1*mouse.depth;
    glColor3f(0.0,1.0,0.0);
    glBegin(GL_LINES);
    glVertex3d(mouse.pos[0]+a,mouse.pos[1],mouse.pos[2]);
    glVertex3d(mouse.pos[0]-a,mouse.pos[1],mouse.pos[2]);
    glVertex3d(mouse.pos[0],mouse.pos[1]+a,mouse.pos[2]);
    glVertex3d(mouse.pos[0],mouse.pos[1]-a,mouse.pos[2]);
    glVertex3d(mouse.pos[0],mouse.pos[1],mouse.pos[2]+a);
    glVertex3d(mouse.pos[0],mouse.pos[1],mouse.pos[2]-a);
    glEnd();

    Form1->Caption=AnsiString().sprintf("%.3lf , %.3lf , %.3lf : %u",mouse.pos[0],mouse.pos[1],mouse.pos[2],mouse.id);

    // debug buffer views
    if ((Form1->ck_depth->Checked)||(Form1->ck_stencil->Checked))
        {
        glDisable(GL_DEPTH_TEST);
        glMatrixMode(GL_PROJECTION);
        glPushMatrix();
        glLoadIdentity();
        glMatrixMode(GL_MODELVIEW);
        glLoadIdentity();
        glEnable(GL_TEXTURE_2D);
        glBindTexture(GL_TEXTURE_2D,txr);
        GLfloat *f=new GLfloat[xs*ys],z;
        if (Form1->ck_depth  ->Checked)
            {
            glReadPixels(0,0,xs,ys,GL_DEPTH_COMPONENT,GL_FLOAT,f);
            for (i=0;i<xs*ys;i++) f[i]=1.0-(2.0*mouse.zNear)/(mouse.zFar+mouse.zNear-(((2.0*f[i])-1.0)*(mouse.zFar-mouse.zNear)));
            glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, xs, ys, 0, GL_RED, GL_FLOAT, f);
            }
        if (Form1->ck_stencil->Checked)
            {
            glReadPixels(0,0,xs,ys,GL_STENCIL_INDEX,GL_FLOAT,f);
            for (i=0;i<xs*ys;i++) f[i]/=float(objs);
            glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, xs, ys, 0, GL_GREEN, GL_FLOAT, f);
            }
        delete[] f;
        glColor3f(1.0,1.0,1.0);
        glBegin(GL_QUADS);
        glTexCoord2f(1.0,0.0); glVertex2f(+1.0,-1.0);
        glTexCoord2f(1.0,1.0); glVertex2f(+1.0,+1.0);
        glTexCoord2f(0.0,1.0); glVertex2f(-1.0,+1.0);
        glTexCoord2f(0.0,0.0); glVertex2f(-1.0,-1.0);
        glEnd();
        glMatrixMode(GL_PROJECTION);
        glPopMatrix();
        glDisable(GL_TEXTURE_2D);
        glEnable(GL_DEPTH_TEST);
        }
    glFlush();
    SwapBuffers(hdc);
    }
//---------------------------------------------------------------------------
__fastcall TForm1::TForm1(TComponent* Owner):TForm(Owner)
    {
    int i;
    object *o;

    gl_init(Handle);
    vao_init();

    // init textures
    glGenTextures(1,&txr);
    glEnable(GL_TEXTURE_2D);
    glBindTexture(GL_TEXTURE_2D,txr);
    glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S,GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T,GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER,GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,GL_NEAREST);
    glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE,GL_COPY);
    glDisable(GL_TEXTURE_2D);

    // init objects
    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();
    glTranslatef(-1.5,4.7,-8.0);
    for (o=obj,i=0;i<objs;i++,o++)
        {
        o->id=i+1;  // unique non zero ID
        glGetDoublev(GL_MODELVIEW_MATRIX,o->m);
        glRotatef(360.0/float(objs),0.0,0.0,1.0);
        glTranslatef(-3.0,0.0,0.0);
        }
    for (o=obj,i=0;i<objs;i++,o++)
        {
        glLoadMatrixd(o->m);
        glRotatef(180.0*Random(),Random(),Random(),Random());
        glGetDoublev(GL_MODELVIEW_MATRIX,o->m);
        }
    }
//---------------------------------------------------------------------------
void __fastcall TForm1::FormDestroy(TObject *Sender)
    {
    glDeleteTextures(1,&txr);
    gl_exit();
    vao_exit();
    }
//---------------------------------------------------------------------------
void __fastcall TForm1::FormResize(TObject *Sender)
    {
    gl_resize(ClientWidth,ClientHeight);
    // obtain/init matrices
    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();
    glTranslatef(0,0,-15.0);
    glGetDoublev(GL_MODELVIEW_MATRIX,ieye);
    matrix_inv(eye,ieye);
    mouse.resize(0,0,xs,ys,eye);
    }
//---------------------------------------------------------------------------
void __fastcall TForm1::FormPaint(TObject *Sender)
    {
    gl_draw();
    }
//---------------------------------------------------------------------------
void __fastcall TForm1::Timer1Timer(TObject *Sender)
    {
    gl_draw();
    }
//---------------------------------------------------------------------------
void __fastcall TForm1::FormMouseWheel(TObject *Sender, TShiftState Shift, int WheelDelta, TPoint &MousePos, bool &Handled)
    {
    GLfloat dz=2.0;
    if (WheelDelta<0) dz=-dz;
    glMatrixMode(GL_MODELVIEW);
    glLoadMatrixd(ieye);
    glTranslatef(0,0,dz);
    glGetDoublev(GL_MODELVIEW_MATRIX,ieye);
    matrix_inv(eye,ieye);
    gl_draw();
    }
//---------------------------------------------------------------------------
void __fastcall TForm1::FormMouseMove(TObject *Sender, TShiftState Shift, int X, int Y)
    {
    mouse.pick(X,Y);
    }
//---------------------------------------------------------------------------
void __fastcall TForm1::ck_depthClick(TObject *Sender)
    {
    gl_draw();
    }
//---------------------------------------------------------------------------

Here preview of from left RGB,Depth,Stencil:

preview

Here captured GIF:

GIF preview

the first 3 numbers are the 3D position of picked pixel in [GCS] and the last number in caption is the picked ID where 0 means no object.

The example is using gl_simple,h from here:

  • simple complete GL+VAO/VBO+GLSL+shaders example in C++

You can ignore the VCL stuff as its not important just port the events to your environment…

So what to do:

  1. rendering

    You need add stencil buffer to your GL window pixel format so in my case I just add:

     pfd.cStencilBits = 16;
    

    into gl_init() function from gl_simple.h. Also add its bit into glClear and set each objects stencil to its ID Like I did in gl_draw().

  2. picking

    I wrote a small glMouse class that do all the heavy lifting. On each change of perspective, view, or viewport call its glMouse::resize function. That will prepare all the constants needed for the computations later. Beware it needs direct camera/view matrix !!!

    Now on each mouse movement (or click or whatever) call the glMouse::pick function and then use the results like id which will return the ID picked object was rendered with or pos which is the 3D coordinate in global world coordinates ([GCS]) of the ray object intersection.

    The function just read the depth and stencil buffers. Linearize depth like here:

    • depth buffer got by glReadPixels is always 1

    and compute the ray beg,dir,pos,depth in [GCS].

  3. Normal

    You got 2 options either render your normal as another buffer which is the simplest and most precise. Or read depths of 2 or more neighboring pixels around picked one compute their 3D positions. From that using cross product compute you normal(s) and average if needed. But this can lead to artifacts on edges.

As mentioned in the comments to boost accuracy you should use linear depth buffer instead of linearized logarithmic like this:

  • Linear depth buffer

Btw I used the same technique in here (in GDI based SW isometric render):

  • Improving performance of click detection on a staggered column isometric grid

[Edit1] 8bit stencil buffer

Well these days the reliable stencil bitwidth is only 8bit which limits the number of ids to 255. That is in most cases not enough. A workaround is to render the indexes as colors then store the frame into CPU memory and then render colors normaly. Then when needed using the stored frame for picking. Rendering to texture or color attachment is also a possibility.

[Edit2] some related links

  • objects moving with mouse
  • objects moving and orienting with mouse

Leave a Comment

tech