function varargout = PsychGPUTestAndTweakGammaTables(win, xoffset, deviceType, injectFault, varargin) % % PsychGPUTestAndTweakGammaTables - Test and tweak GPU hardware gamma tables. % % This function is a helper function used for high-precision display % devices like the Bits# from CRS and DataPixx/ViewPixx/ProPixx from VPixx, % which need identity passthrough of framebuffer pixel data to the video outputs % without any interference by the GPU and other intermediate encoder circuitry. % % It augments functions like LoadIdentityClut, trying to cope with GPUs that % are too broken to be fixable just by that function without any actual % measurement of video signals. % The function is called by toolboxes for special display output hardware % and makes use of / calls back into hardware specific functions of that % hardware to allow an iterative feedback loop of parameter tweaks and % measurements to execute in order to optimize gamma tables for optimal % pixel passthrough after other hardware-independent measures have failed. % % The routine is, e.g., used by the 'GPUEncoderTest' / 'CheckGPUSanity' % functions of the PsychDatapixx and BitsPlusPlus functions. % % Input parameters: (Mandatory) % 'win' = Onscreen window handle for onscreen window which displays to the % external high precision display device. % % 'deviceType' = Type of device: 0 = VPixx Inc. Data-/View-/Pro-Pixx. % 1 = CRS Bits# % % 'injectFault' = 1 = Intentionally setup a slightly faulty LUT to perturb % the signal and test the tweaking procedure. 0 = Don't. % % Returns 0 on success, 1 on failure. % % History: % 11/04/2013 mk Written. Consolidated from PsychDataPixx driver function % to share all the common code for VPixx and CRS products. % % 04/15/2014 mk Shift test strip 2 rows upward for robustness against % broken gfx-drivers. Remove try-catch hack for gamma table % loading on MS-Windows. Doesn't help, but obscure errors. global GL; %#ok if nargin < 4 error('Required minimum 3 parameters missing!'); end KbReleaseWait(-1); if deviceType == 0 % Open our own connection to Datapixx: devName = 'PsychDataPixx'; PsychDataPixx('Open'); % Repeat each measurement 60 times on DataPixx: numRescans = 60; end if deviceType == 1 devName = 'BitsPlusPlus'; % Retrieve IOPort handle to Bits# serial port connection: bitsSharpPort = varargin{1}; % Repeat each measurement 20 times on Bits# to compensate for its % slow readback: numRescans = 20; % Switch to Bits# status screen before pixel readback. Why? Pixel readback would % implicitely switch to status screen if it is not already active, but this % switch takes multiple seconds and causes instable operation of pixel readback, % ie., it delays readback and can cause timeouts in our readback code. % We prevent this by switching beforehand and giving it more than enough time to % stabilize: fprintf('%s: INFO: Switching Bits# into diagnostic mode. Will take about 5 seconds...\n', devName); IOPort('Write', bitsSharpPort, ['$statusScreen' char(13)]); WaitSecs('YieldSecs', 5); end % Set to 1 for fault injection to test the procedure: if injectFault fprintf('%s: INFO: Injecting perturbed gamma table to self-test my tweaking abilities...\n', devName); oldlut = Screen('ReadNormalizedGammaTable', win); Screen('LoadNormalizedGammaTable', win, oldlut * 0.95); %Screen('LoadNormalizedGammaTable', win, rand(size(oldlut))); end % Build 256 pixel level RGB test gradient: testdata = uint8(zeros(3, 256)); testdata(:,:) = uint8(repmat(0:255, 3, 1)); % Clear the onscreen window to black and set its clear color to black. Screen('FillRect', win, 0); % Wait at least two video refresh cycles to make sure the LUT's in the % GPU have stabilized: if deviceType == 0 % DataPixx: Wait for two vsync pulses: Datapixx('RegWrRdVideoSync'); Datapixx('RegWrRdVideoSync'); else % Other: Just wait for approximately 2 refresh durations: WaitSecs('YieldSecs', 2 * Screen('GetFlipInterval', win)); end % At this point the GPU gamma tables should be already loaded with an % identity LUT via LoadIdentityClut(). The GL struct and moglcore are % initialized so we can use low-level glXXX commands. % Testing loop: Will display the horizontal test gradient in the top % scanline of the display, then read it back from the device. If % everything works correctly then the readback gradient should match % the drawn gradient. A mismatch means that either the gamma tables are % not loaded with a proper identity LUT, or that some kind of % spatio-temporal display dithering is active and interfering. % % We can try to auto-correct for wrong identity LUT's with our iterative % correction loop. It will tweak the gamma tables until a match is % achieved. We can't do anything about dithering though beyond % reporting it: oldlut = Screen('ReadNormalizedGammaTable', win); nrlutslots = size(oldlut, 1); curlut = oldlut; psychlasterror('reset'); % We tweak gamma table elements in steps of 1/1024 units per iteration. % As the granularity of Digital DVI-D is only 8 bits or 1/256 units, % that means we are resolving 4 times more fine-grained than the device % resolution on DVI. This just for safety to avoid "jumping into the % wrong bin" due to roundoff errors in the OS/Driver. Could probably do % more coarse-grained, but better safe than sorry... stepsize = 1/1024; nIter = 100; failcount = 0; successcount = 0; regressioncount = 0; fluctcount = 0; retry = 1; % Draw test pattern: We don't "start" drawing the 10 scanlines high % test strip at y position 10, but already at 8, so technically the % two topmost lines will be outside the screen. This in case some % graphics drivers have off-by-one or off-by-two y position bugs, so % we are robust against that: glRasterPos2i(xoffset, 8); glDrawPixels(256, 10, GL.RGB, GL.UNSIGNED_BYTE, repmat(testdata, 1, 10)); % Show it at next retrace: Screen('Flip', win); % Wait generous 5 retrace cycles in case a compositor is delaying things: WaitSecs('YieldSecs', 5 * Screen('GetFlipInterval', win)); fprintf('%s: INFO: GPU gamma table and display encoder test running. Please be patient...\n', devName); tStart = GetSecs; % Enter test and auto-correction loop: while retry retry2=1; while retry2 && (fluctcount < nIter) if deviceType == 0 % Wait for next vsync pulse: Datapixx('RegWrRdVideoSync'); % Read back stabilized pixel data: oldrealpixels = Datapixx('GetVideoLine', 256); end if deviceType == 1 % Wait until at least next vsync: WaitSecs('YieldSecs', 1 * Screen('GetFlipInterval', win)); % Readback 2nd topmost scanline from Bits# : oldrealpixels = BitsPlusPlus('GetVideoLine', 256, 2); end % Repeat measurement until at least numRescans consecutive scans give % the same value: for i=1:numRescans if deviceType == 0 % Wait for next vsync pulse: Datapixx('RegWrRdVideoSync'); % Read back stabilized pixel data: realpixels = Datapixx('GetVideoLine', 256); end if deviceType == 1 % Wait until at least next vsync: WaitSecs('YieldSecs', 1 * Screen('GetFlipInterval', win)); % Readback 2nd topmost scanline from Bits# : realpixels = BitsPlusPlus('GetVideoLine', 256, 2); end if any(any(int16(oldrealpixels) - int16(realpixels))) % Difference between 1st measurement and i'th % measurement detected. Abort scan and retry: fluctcount = fluctcount + 1; clc; fprintf('%s: INFO: GPU gamma table and display encoder test running. Please be patient...\n', devName); fprintf('Instable DVI-D signal, possibly due to dithering! Will try to fix this by iterative tweaking of gamma tables.\n'); fprintf('Iteration Nr. %i in progress. Current pixelvalues received from topmost scanline:\n\n', failcount); disp(realpixels); break; else if i==numRescans retry2=0; end end end end if fluctcount >= nIter % Totally unstable DVI signal. This could be due to % (spatio-)temporal dithering being enabled on the display head % or due to serious signal integrity issues. fprintf('%s: INFO: No stable signal received during over %i iterations! Interference by dithering?!?\n', devName, nIter); fluctcount = 0; end % 'realpixels' contains a measurement that has been stable for at % least numRescans video cycles. We use this to judge the quality of the % current GPU LUT settings and perform corrections if needed. % Compute delta matrix to reference values: This should contain % all-zeros on properly working GPU's: deltavec = int16(testdata) - int16(realpixels); % Difference? if any(any(deltavec)) if failcount == 0 fprintf('%s: WARNING: Your GPU has a wrong identity gamma table loaded, possibly due to driver bugs.\n', devName); fprintf('%s: WARNING: I will try to automatically compensate for this now. Please standby...\n', devName); end failcount = failcount + 1; if successcount > 0 regressioncount = regressioncount + 1; fprintf('%s: WARNING: %i. regression detected after tweak %i! Previously successfull setting produces wrong values now\nafter %i successfull iterations!\n', devName, regressioncount, failcount, successcount); successcount = 0; end % Convert to double matrix and transpose to match LUT format. % Then convert to a corrective increment of stepsize units per % 1 pixel unit error: deltavec = transpose(double(deltavec)) * stepsize; % Build correction vector tweakvec: deltavec is a 256 rows by 3 % columns matrix of delta values to add to curlut for tweaking % it to a more correct value. curlut, as read from the GPU, % always has 3 columns for (Red, Green, Blue) just like % deltavec, but it can have more than 256 slots on high-end % GPU's with bigger gamma tables. E.g., the NVidia % QuadroFX-3800 when run under Linux with the binary blob % driver will have a 2^11 = 2048 slot lut. Therefore we need to % "stretch" deltavec vertically so it matches the size of the % 'curlut'. By design of the Datapixx/Bits# and DVI-D data % transmission, we can always only compute 256 rows of % meaningful tweaking information, therefore we simply repmat % replicate successive rows in deltavec to blow it up to a % matching tweakvec. This should be good enough to init the 256 % rows subset of curlut we actually care about with the tweaked % values we found: tweakvec = zeros(size(curlut)); chunksize = nrlutslots / 256; for tchunk = 0:255 tweakvec((1 + (tchunk * chunksize)):((tchunk * chunksize) + chunksize), :) = repmat(deltavec(1 + tchunk, :), chunksize, 1); end % Update gamma lut: curlut = curlut + tweakvec; mmcount = sum(sum(deltavec ~= 0)); fprintf('%s: INFO: Gamma table tweak iteration %i. Had %i mismatching pixels. Retesting with updated LUT...\n', devName, failcount, mmcount); % Clamp to valid 0-1 range: curlut(curlut > 1) = 1; curlut(curlut < 0) = 0; % Upload modified LUT to GPU: Screen('LoadNormalizedGammaTable', win, curlut); % Tried too long, too hard? if failcount > 255 || KbCheck(-1) % Running with non-zero rasterizer offset? if xoffset ~= 0 % Hmm. Could be a cascade of bugs. Reset xoffset to % zero and retry whole procedure just to be on the safe % side: fprintf('%s: WARNING: No success so far with rasterizeroffset %i. Retrying with zero-offset...\n', devName, xoffset); failcount = 0; curlut = oldlut; Screen('LoadNormalizedGammaTable', win, oldlut); % Draw test pattern: Screen('Flip', win); xoffset = 0; glRasterPos2i(xoffset, 10); glDrawPixels(256, 10, GL.RGB, GL.UNSIGNED_BYTE, repmat(testdata, 1, 10)); % Show it at next retrace: Screen('Flip', win); % Wait until at least next vsync: WaitSecs('YieldSecs', 1 * Screen('GetFlipInterval', win)); if ~KbCheck(-1) % Encore une fois... continue; end end % Ok, this can't be. Restore gamma table and give up: Screen('LoadNormalizedGammaTable', win, oldlut); if KbCheck(-1) fprintf('%s: USER ABORT VIA KEYPRESS.\n', devName); end % Fail: fprintf('%s: ERROR: Did not reach a stable result after %i tweak iterations.\n', devName, failcount); fprintf('%s: ERROR: This is hopeless, something is seriously wrong with your GPU. I give up!\n', devName); fprintf('%s: ERROR: One possible reason could be that (spatio-)temporal dithering is enabled for your\n', devName); fprintf('%s: ERROR: display due to some graphics driver bug, which prevents me from converging\n', devName); fprintf('%s: ERROR: to a stable result. Your high precision device will not be useable for high-precision\n', devName); fprintf('%s: ERROR: color or luminance displays with this broken graphics card/driver. Fix your system!\n', devName); if deviceType == 0 % Close our own connection to Datapixx: PsychDataPixx('Close'); end varargout{1} = 1; return; end else % No difference. Good. Let's repeat the exercise a few times to % rule out bad interference from temporal dithering: successcount = successcount + 1; fprintf('%s: INFO: Posttest iteration %i passed. Good! [Total elapsed time so far %f seconds.]\n', devName, successcount, GetSecs - tStart); if successcount >= 10 % 10 successfull consecutive repetitions. That's good % enough for us: retry = 0; end end end % Ok, we managed to get through: Screen('Flip', win); % How did we do? if (successcount >= 10) && (failcount == 0) % Purrfect! Return full success: fprintf('%s: INFO: Online test of GPU hardware identity clut and display encoder passed after a total of %f seconds.\n', devName, GetSecs - tStart); varargout{1} = 0; else % Got through, but with a non-zero failcount. That probably means % that display temporal dithering is not an issue, but that the % gamma table needed tweaking. fprintf('%s: INFO: Managed to find a useable gamma table setting after %i tweak iterations and %f seconds. :-)\n', devName, failcount, GetSecs - tStart); if regressioncount > 0 fprintf('%s: INFO: There were %i regressions throughout the procedure. Could be problems with active display dithering or bad signal quality?\n', devName, regressioncount); end if fluctcount == 0 fprintf('\n%s: INFO: There wasn''t any instability during the last testing cycle,\n', devName); fprintf('%s: INFO: so harmful display dithering may be absent now and you will be probably fine.\n', devName); end fprintf('%s: INFO: I will store this LUT on your filesystem for future use.\n', devName); if 0 fprintf('%s: INFO: The difference matrix between the original LUT and the corrected LUT looks like this:\n\n', devName); %#ok disp(curlut - oldlut); end fprintf('\n\n'); % Store the current gamma table LUT in a config file for later use % by LoadIdentityClut in successive runs: SaveIdentityClut(win); varargout{1} = 0; end if deviceType == 0 % Close our own connection to Datapixx: PsychDataPixx('Close'); end return; end