hostsupport/hostopenvg/src/src/riRasterizer.cpp
branchbug235_bringup_0
changeset 54 067180f57b12
parent 53 c2ef9095503a
child 55 09263774e342
--- a/hostsupport/hostopenvg/src/src/riRasterizer.cpp	Wed Oct 06 17:59:01 2010 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1073 +0,0 @@
-/*------------------------------------------------------------------------
- *
- * OpenVG 1.1 Reference Implementation
- * -----------------------------------
- *
- * Copyright (c) 2007 The Khronos Group Inc.
- * Portions copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and /or associated documentation files
- * (the "Materials "), to deal in the Materials without restriction,
- * including without limitation the rights to use, copy, modify, merge,
- * publish, distribute, sublicense, and/or sell copies of the Materials,
- * and to permit persons to whom the Materials are furnished to do so,
- * subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Materials.
- *
- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR
- * THE USE OR OTHER DEALINGS IN THE MATERIALS.
- *
- *//**
- * \file
- * \brief	Implementation of polygon rasterizer.
- * \note
- *//*-------------------------------------------------------------------*/
-
-#include "riRasterizer.h"
-
-// TEMP!
-#ifndef __SFCOMPILER_H
-#   include "sfCompiler.h"
-#endif
-
-
-namespace OpenVGRI
-{
-
-/*-------------------------------------------------------------------*//*!
-* \brief	Rasterizer constructor.
-* \param
-* \return
-* \note
-*//*-------------------------------------------------------------------*/
-
-Rasterizer::Rasterizer() :
-    m_covBuffer(NULL),
-    m_covBufferSz(0),
-    m_edges(),
-    m_scissorEdges(),
-    m_scissor(false),
-    m_aa(true),
-    m_vpx(0),
-    m_vpy(0),
-    m_vpwidth(0),
-    m_vpheight(0),
-    m_fillRule(VG_EVEN_ODD),
-    m_pixelPipe(NULL),
-    m_nSpans(0)
-{}
-
-/*-------------------------------------------------------------------*//*!
-* \brief	Rasterizer destructor.
-* \param
-* \return
-* \note
-*//*-------------------------------------------------------------------*/
-
-Rasterizer::~Rasterizer()
-{
-    if(m_covBuffer)
-        RI_DELETE_ARRAY(m_covBuffer);
-}
-
-/*-------------------------------------------------------------------*//*!
-* \brief	Removes all appended edges.
-* \param
-* \return
-* \note
-*//*-------------------------------------------------------------------*/
-
-#define EDGE_TERMINATOR 0xFFFFFFFFu
-
-void Rasterizer::clear()
-{
-    //m_edges.clear();
-    for (int i = 0; i < m_edges.size(); i++)
-        m_edges[i] = EDGE_TERMINATOR;
-
-    m_edgePool.clear();
-
-    m_edgeMin.set(0x7fffffffu, 0x7fffffffu);
-    m_edgeMax.set(0x80000000, 0x80000000);
-}
-
-/*-------------------------------------------------------------------*//*!
-* \brief	Appends an edge to the rasterizer.
-* \param
-* \return
-* \note
-*//*-------------------------------------------------------------------*/
-
-void Rasterizer::addBBox(const IVector2& v)
-{
-    if(v.x < m_edgeMin.x) m_edgeMin.x = v.x;
-    if(v.y < m_edgeMin.y) m_edgeMin.y = v.y;
-    if(v.x > m_edgeMax.x) m_edgeMax.x = v.x;
-    if(v.y > m_edgeMax.y) m_edgeMax.y = v.y;
-}
-
-void Rasterizer::pushEdge(const Edge& edge)
-{
-    addBBox(edge.v0);
-    addBBox(edge.v1);
-
-    // Only add processed edges.
-
-    RI_ASSERT(edge.v0.y >= 0);
-    RI_ASSERT(edge.v0.y < edge.v1.y);	//horizontal edges should have been dropped already
-
-    ActiveEdge ae;
-    ae.direction = edge.direction;
-
-    // \todo Adjust for non-AA cases
-    // \todo verySteep is temporary. Either clip to right edge also, or validate that a proper slope can be
-    // calculated here.
-    const int slope	= RI_SAT_SHL((edge.v1.x - edge.v0.x), RASTERIZER_BITS - X_BITS) / (edge.v1.y - edge.v0.y);
-    //const bool verySteep = RI_INT_ABS(edge.v1.x - edge.v0.x) > (1 << (30-RASTERIZER_BITS)) ? true : false;
-    //const int slope	= verySteep ? 1 << 30 : RI_SHL((edge.v1.x - edge.v0.x), RASTERIZER_BITS - X_BITS) / (edge.v1.y - edge.v0.y);
-    // slope: SI.(RASTERIZER_BITS - Y_BITS)
-    const int yF 	= edge.v0.y & Y_MASK;
-    // \todo See verySteep note for this hack also. (Clip to right edge?)
-    const int xRef 	= RI_SAT_SHL(edge.v0.x, RASTERIZER_BITS - X_BITS) - (yF * slope);
-    //const int xRef 	= edge.v0.x > (1<<(30-RASTERIZER_BITS)) ? 1<<30 : RI_SHL(edge.v0.x, RASTERIZER_BITS - X_BITS) - (yF * slope);
-
-    RI_ASSERT(RI_INT_ABS(edge.v0.y <= 32767));
-    RI_ASSERT(RI_INT_ABS(edge.v1.y <= 32767));
-
-    ae.yStart 		= (RIint16)edge.v0.y;
-    ae.yEnd 	  	= (RIint16)edge.v1.y;
-    ae.xRef 		= xRef;
-    ae.slope 		= slope;
-    // Scanline range.
-    ae.minx         = xRef >> RASTERIZER_BITS;
-    ae.maxx         = (xRef + slope * (1<<Y_BITS)) >> RASTERIZER_BITS;
-
-    if (ae.minx > ae.maxx)
-        RI_ANY_SWAP(ActiveEdge::XCoord, ae.minx, ae.maxx);
-
-    if (ae.maxx < 0)
-        ae.minx = ae.maxx = LEFT_DISCARD_SHORT;
-
-    if (m_edges[ae.yStart>>Y_BITS] == EDGE_TERMINATOR)
-        ae.next = EDGE_TERMINATOR;
-    else
-        ae.next = m_edges[ae.yStart>>Y_BITS];
-
-    m_edgePool.push_back(ae);	//throws bad_alloc
-
-    RI_ASSERT(m_edgePool.size() > 0);
-    m_edges[ae.yStart>>Y_BITS] = m_edgePool.size()-1;
-}
-
-/**
- * \brief   Clips an edge and if something remains, adds it to the list of edges.
- * \todo    Enhance precision: Currently this just uses doubles and gets away with
- *          it in most cases.
- */
-void Rasterizer::clipAndAddEdge(Edge& edge)
-{
-    //if (m_edges.size() > 48)
-        //return;
-    // Check y-clips
-    // \todo Reduce amount of clips.
-    bool outLeft[2] = {(edge.v0.x < m_vpMinx), (edge.v1.x < m_vpMinx)};
-    bool outRight[2] = {(edge.v0.x > m_vpMaxx), (edge.v1.x > m_vpMaxx)};
-    bool outTop[2] = {(edge.v0.y < m_vpMiny), (edge.v1.y < m_vpMiny)};
-    bool outBottom[2] = {(edge.v0.y > m_vpMaxy), (edge.v1.y > m_vpMaxy)};
-
-    if (!(outLeft[0] || outLeft[1] || outRight[0] || outRight[1] || outTop[0] || outTop[1] || outBottom[0] || outBottom[1]))
-    {
-        pushEdge(edge);
-        return;
-    }
-
-    // \todo Make sure that checking out-of-right works with the scanconverter.
-    if ((outBottom[0] && outBottom[1]) || (outTop[0] && outTop[1]))
-        return; // Out of bounds
-
-    // \todo Clip to right edge of screen.
-    // \todo Make slope-calculation and signs consistent.
-    //
-    if (outTop[0] || outBottom[1])
-    {
-        // Clip to top/bottom.
-        double slope = (double)(edge.v1.x - edge.v0.x)/(edge.v1.y - edge.v0.y);
-
-        if (outTop[0])
-        {
-            RI_ASSERT(-(RIint64)edge.v0.y >= 0);
-            RIint32 dx = RI_ROUND_TO_INT(-slope * edge.v0.y);
-            edge.v0.y = 0;
-            edge.v0.x += dx;
-        }
-
-        if (outBottom[1])
-        {
-            RIint32 dy = edge.v1.y - m_vpMaxy;
-            RI_ASSERT(dy >= 0);
-            RIint32 dx = -RI_ROUND_TO_INT(slope * dy);
-            edge.v1.y = m_vpMaxy;
-            edge.v1.x += dx;
-        }
-
-    }
-
-    if (edge.v0.y >= edge.v1.y)
-        return;
-
-    // \todo Recheck left/right.
-    outLeft[0] = (edge.v0.x < m_vpMinx); outLeft[1] = (edge.v1.x < m_vpMinx);
-    outRight[1] = (edge.v0.x > m_vpMaxx); outRight[1] = (edge.v1.x > m_vpMaxx);
-
-    if (outLeft[0] && outLeft[1])
-    {
-        edge.v0.x = m_vpMinx;
-        edge.v1.x = m_vpMinx;
-        pushEdge(edge);
-        return;
-    }
-    if (outRight[0] && outRight[1])
-    {
-        edge.v0.x = m_vpMaxx;
-        edge.v1.x = m_vpMaxx;
-        pushEdge(edge);
-        return;
-    }
-
-    // From outside -> screen
-    if (outLeft[0] || outRight[1])
-    {
-        // infinite slope?
-        double slope = (double)((RIint64)edge.v1.y - edge.v0.y)/((RIint64)edge.v1.x - edge.v0.x);
-
-        if (outLeft[0])
-        {
-            RIint32 dx = edge.v0.x;
-            //RI_ASSERT(dx >= 0);
-            // Note the sign.
-            RIint32 dy = RI_ROUND_TO_INT(-slope * dx);
-
-            Edge vpart = edge;
-            vpart.v1.y = edge.v0.y + dy;
-            //vpart.v1.x = edge.v0.x; // = 0?
-            // \note This should be flagged instead of setting the smallest possible
-            // value because of extremely gentle slopes may cause bugs:
-            vpart.v1.x = vpart.v0.x = -0x100000;
-
-            if (vpart.v1.y > vpart.v0.y)
-                pushEdge(vpart);
-
-            edge.v0.y += dy;
-            edge.v0.x = 0;
-        }
-    }
-    // From screen -> outside
-    if (outLeft[1] || outRight[0])
-    {
-        // infinite slope?
-        double slope = (double)((RIint64)edge.v1.y - edge.v0.y)/((RIint64)edge.v1.x - edge.v0.x);
-
-        if (outLeft[1])
-        {
-            RIint32 dx = edge.v0.x;
-            RI_ASSERT(dx >= 0);
-            RIint32 dy = RI_ROUND_TO_INT(-slope * dx);
-
-            Edge vpart = edge;
-            vpart.v0.y = edge.v0.y + dy;
-            vpart.v1.x = vpart.v0.x = LEFT_DISCARD;
-
-            if (vpart.v1.y > vpart.v0.y)
-                pushEdge(vpart);
-
-            edge.v1.y = edge.v0.y + dy;
-            edge.v1.x = 0;
-        }
-    }
-
-    if (edge.v0.y >= edge.v1.y)
-        return;
-
-    // Finally, add the edge:
-    pushEdge(edge);
-}
-
-void Rasterizer::addEdge(const Vector2& v0, const Vector2& v1)
-{
-    if( m_edges.size() >= RI_MAX_EDGES )
-        throw std::bad_alloc();	//throw an out of memory error if there are too many edges
-
-    Edge e;
-
-    {
-        IVector2 i0(RI_ROUND_TO_INT(v0.x * (1<<X_BITS)), RI_ROUND_TO_INT(v0.y * (1<<Y_BITS)));
-        IVector2 i1(RI_ROUND_TO_INT(v1.x * (1<<X_BITS)), RI_ROUND_TO_INT(v1.y * (1<<Y_BITS)));
-
-        if(i0.y == i1.y)
-            return;	//skip horizontal edges (they don't affect rasterization since we scan horizontally)
-
-        if (i0.y < i1.y)
-        {
-            // Edge is going upward
-            e.v0 = i0;
-            e.v1 = i1;
-            e.direction = 1;
-        }
-        else
-        {
-            // Edge is going downward
-            e.v0 = i1;
-            e.v1 = i0;
-            e.direction = -1;
-        }
-    }
-
-    // Clip and insert.
-
-    clipAndAddEdge(e);
-}
-
-/*-------------------------------------------------------------------*//*!
-* \brief	Set up rasterizer
-* \param
-* \return
-* \note
-*//*-------------------------------------------------------------------*/
-
-void Rasterizer::setup(int vpx, int vpy, int vpwidth, int vpheight, VGFillRule fillRule, const PixelPipe* pixelPipe)
-{
-    RI_ASSERT(vpwidth >= 0 && vpheight >= 0);
-    RI_ASSERT(vpx + vpwidth >= vpx && vpy + vpheight >= vpy);
-    RI_ASSERT(fillRule == VG_EVEN_ODD || fillRule == VG_NON_ZERO);
-    RI_ASSERT(pixelPipe);
-
-    clear();
-
-    m_vpx = vpx;
-    m_vpy = vpy;
-    m_vpwidth = vpwidth;
-    m_vpheight = vpheight;
-
-    if (m_vpheight > m_edges.size())
-    {
-        int os = m_edges.size();
-        m_edges.resize(m_vpheight);
-        for (int i = os; i < m_edges.size(); i++)
-            m_edges[i] = EDGE_TERMINATOR;
-    }
-
-    m_vpMinx = RI_SHL(vpx, X_BITS);
-    m_vpMiny = RI_SHL(vpy, Y_BITS);
-    m_vpMaxx = RI_SHL(vpx + vpwidth, X_BITS);
-    m_vpMaxy = RI_SHL(vpy + vpheight, Y_BITS);
-
-    m_fillRule = fillRule;
-
-    RIuint32 fillRuleMask = fillRule == VG_NON_ZERO ? 0xffffffffu : 1;
-    m_fillRuleMask = fillRuleMask;
-
-    m_pixelPipe = pixelPipe;
-    m_covMinx = vpx+vpwidth;
-    m_covMiny = vpy+vpheight;
-    m_covMaxx = vpx;
-    m_covMaxy = vpy;
-}
-
-/*-------------------------------------------------------------------*//*!
-* \brief	Sets scissor rectangles.
-* \param
-* \return
-* \note
-*//*-------------------------------------------------------------------*/
-
-void Rasterizer::setScissor(const Array<Rectangle>& scissors)
-{
-    try
-    {
-        m_scissorEdges.clear();
-        for(int i=0;i<scissors.size();i++)
-        {
-            if(scissors[i].width > 0 && scissors[i].height > 0)
-            {
-                ScissorEdge e;
-                e.miny = scissors[i].y;
-                e.maxy = RI_INT_ADDSATURATE(scissors[i].y, scissors[i].height);
-
-                e.x = scissors[i].x;
-                e.direction = 1;
-                m_scissorEdges.push_back(e);	//throws bad_alloc
-                e.x = RI_INT_ADDSATURATE(scissors[i].x, scissors[i].width);
-                e.direction = -1;
-                m_scissorEdges.push_back(e);	//throws bad_alloc
-            }
-        }
-    }
-    catch(std::bad_alloc)
-    {
-        m_scissorEdges.clear();
-        throw;
-    }
-}
-
-void Rasterizer::setScissoring(bool enabled)
-{
-    m_scissor = enabled;
-}
-
-static RI_INLINE void small_memcpy32(void* dst, const void* src, size_t n)
-{
-    RIuint32 *d = (RIuint32*)dst;
-    const RIuint32 *s = (const RIuint32*)src;
-    while(n)
-    {
-        *d++ = *s++;
-        n-=4;
-    }
-}
-
-// \todo Move this to some debug file or remove.
-#if defined(USE_SSE2) && !defined(_WIN32)
-RI_INLINE static void print128(__m128i ll)
-{
-#if defined(RI_DEBUG)
-    unsigned long long v[2];
-    _mm_storeu_pd((double*)v, (__m128d)ll);
-    RI_PRINTF("0x%016llx %016llx\n", v[0], v[1]);
-#else
-    (void)ll;
-#endif
-}
-#endif
-
-#if defined(USE_SSE2)
-RI_INLINE static __m128i mm_mul4x32(const __m128i a, const __m128i b) {
-     __m128i res;
-#if (_MSC_VER > 1400 )
-     // \todo Simpler way to do this on intel?
-     __m128i m0 = _mm_mul_epu32(a, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 1, 0, 0)));
-     __m128i m1 = _mm_mul_epu32(a, _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 2, 2)));
-
-     res = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(m0), _mm_castsi128_ps(m1), _MM_SHUFFLE(2, 0, 2, 0)));
-#else
-     __asm {
-         movdqa xmm1, a;
-         movdqa xmm2, b;
-         pshufd xmm3, xmm2, 80;
-         movdqa xmm0, xmm1;
-
-         pshufd xmm2, xmm2, 250;
-         pmuludq xmm0, xmm3;
-         pmuludq xmm1, xmm2;
-
-         shufps xmm0, xmm1, 136;
-         movdqa res, xmm0;
-     }
-#endif
-     return res;
-}
-#endif
-
-#if defined(USE_SSE2)
-RI_INLINE static void mm_get_xmasks(const __m128i& coords, const __m128i& sampleCoords, __m128i& slWindMask, __m128i& pxWindMask)
-{
-    const __m128i z = _mm_setzero_si128();
-    const __m128i xMask = _mm_cmpeq_epi16(_mm_srai_epi16(coords, Rasterizer::RASTERIZER_BITS), z);
-    const __m128i sCmp = _mm_or_si128(_mm_cmpgt_epi16(sampleCoords, coords), _mm_cmpeq_epi16(sampleCoords, coords));
-    //const __m128i sCmp = _mm_cmplt_epi16(coords, sampleCoords);
-    slWindMask = xMask;
-    pxWindMask = _mm_and_si128(xMask, sCmp);
-}
-#endif
-
-RI_INLINE static void getVerticalSubpixels(int iY, int yStart, int yEnd, int& py0, int& py1)
-{
-    const int cy = iY << Rasterizer::Y_BITS;
-    py0 = cy > yStart ? 0 : yStart & Rasterizer::Y_MASK;
-    py1 = (RI_INT_MIN(yEnd, cy + (1<<Rasterizer::Y_BITS)) - 1) & Rasterizer::Y_MASK;
-}
-
-RI_INLINE static void applyLeftEdge(const Rasterizer::ActiveEdge& currAe, Rasterizer::Windings& scanline, int intY)
-{
-    // Applies the whole edge at a time. Make sure xRight < x for all y.
-    // \todo Remove duplicate code for determining the active samples
-#if defined(USE_SSE2)
-    int py0, py1;
-
-    getVerticalSubpixels(intY, currAe.yStart, currAe.yEnd, py0, py1);
-
-    const __m128i csteps = _mm_set_epi16(7,6,5,4,3,2,1,0);
-
-    const __m128i ssePy0 = _mm_set1_epi16(py0-1);
-    const __m128i ssePy1 = _mm_set1_epi16(py1+1);
-
-    const __m128i yMask = _mm_and_si128(_mm_cmpgt_epi16(csteps, ssePy0), _mm_cmplt_epi16(csteps, ssePy1));
-    const __m128i dir = _mm_set1_epi16(currAe.direction);
-
-    scanline.sseWinding = _mm_add_epi16(scanline.sseWinding, _mm_and_si128(yMask, dir));
-
-#else
-    RI_ASSERT(false); // Not implemented yet.
-#endif
-}
-
-RI_INLINE static void applyLeftEdgeNoAA(const Rasterizer::ActiveEdge& currAe, Rasterizer::Windings& scanline, int intY)
-{
-    // Applies the whole edge at a time. Make sure xRight < x for all y.
-    // \todo Remove duplicate code for determining the active samples?
-#if defined(USE_SSE2)
-    int py0, py1;
-
-    getVerticalSubpixels(intY, currAe.yStart, currAe.yEnd, py0, py1);
-
-    //const __m128i csteps = _mm_set_epi16(4,4,4,4,4,4,4,4);
-
-    __m128i yMask;
-
-    if (py0 <= 4 && py1 >= 4)
-        yMask = _mm_set1_epi8(-1);
-    else
-        yMask = _mm_set1_epi8(0);
-
-    const __m128i dir = _mm_set1_epi16(currAe.direction);
-
-    scanline.sseWinding = _mm_add_epi16(scanline.sseWinding, _mm_and_si128(yMask, dir));
-    //scanline.sseWinding = _mm_add_epi32(scanline.sseWinding, dir);
-
-#else
-    RI_ASSERT(false); // Not implemented yet.
-#endif
-}
-
-RI_INLINE void calculateAEWinding(const Rasterizer::ActiveEdge& currAe, Rasterizer::Windings& pixel, Rasterizer::Windings& scanline, int intY, int pixelX)
-{
-#define QUEEN_COORD(Y) ((Y<<(Rasterizer::RASTERIZER_BITS - Rasterizer::SAMPLE_BITS)) + (1<<(Rasterizer::RASTERIZER_BITS-Rasterizer::SAMPLE_BITS-1)))
-
-#if !defined(USE_SSE2)
-    static const int queenCoords[(1<<Rasterizer::SAMPLE_BITS)] = {
-        QUEEN_COORD(3), QUEEN_COORD(7), QUEEN_COORD(0), QUEEN_COORD(2),
-        QUEEN_COORD(5), QUEEN_COORD(1), QUEEN_COORD(6), QUEEN_COORD(4)
-    };
-
-    const int ix = pixelX >> Rasterizer::RASTERIZER_BITS;
-    const int cy = intY << Rasterizer::Y_BITS;
-
-    const int py0 = cy > currAe.yStart ? 0 : currAe.yStart & Rasterizer::Y_MASK;
-    const int py1 = (RI_INT_MIN(currAe.yEnd, cy + (1<<Rasterizer::Y_BITS)) - 1) & Rasterizer::Y_MASK;
-
-    int edgeX = currAe.xRef + (cy + py0 - (currAe.yStart & ~Rasterizer::Y_MASK)) * currAe.slope;
-
-    RI_ASSERT(py1 >= py0);
-
-    for (int s = py0; s <= py1; s++)
-    {
-        const int sampleX = pixelX + queenCoords[s];
-
-        //compute winding number by evaluating the edge functions of edges to the left of the sampling point
-        if(((edgeX >> Rasterizer::RASTERIZER_BITS) == ix))
-        {
-            if (sampleX >= edgeX)
-            {
-                pixel.winding[s] += currAe.direction;
-            }
-            scanline.winding[s] += currAe.direction;
-        }
-
-        edgeX += currAe.slope;
-    }
-#else
-
-    __m128i qCoords = _mm_set_epi16(
-        QUEEN_COORD(4), QUEEN_COORD(6), QUEEN_COORD(1), QUEEN_COORD(5),
-        QUEEN_COORD(2), QUEEN_COORD(0), QUEEN_COORD(7), QUEEN_COORD(3));
-
-    RI_ASSERT(Rasterizer::RASTERIZER_BITS <= 14);
-
-    // TEROP: Optimize conditions.
-    int py0, py1;
-    getVerticalSubpixels(intY, currAe.yStart, currAe.yEnd, py0, py1);
-
-    const int cy = intY << Rasterizer::Y_BITS;
-
-    const __m128i csteps0 = _mm_set_epi32(3,2,1,0);
-    const __m128i csteps1 = _mm_set_epi32(7,6,5,4);
-
-    const __m128i ssePy0 = _mm_set1_epi32(py0-1);
-    const __m128i ssePy1 = _mm_set1_epi32(py1+1);
-
-    const __m128i yMask0 = _mm_and_si128(_mm_cmpgt_epi32(csteps0, ssePy0), _mm_cmplt_epi32(csteps0, ssePy1));
-    const __m128i yMask1 = _mm_and_si128(_mm_cmpgt_epi32(csteps1, ssePy0), _mm_cmplt_epi32(csteps1, ssePy1));
-
-    const int edgeX = currAe.xRef + (cy - (currAe.yStart & ~Rasterizer::Y_MASK)) * currAe.slope;
-    const __m128i xStart = _mm_set1_epi32(edgeX - pixelX);
-
-    const __m128i xs0 = _mm_set1_epi32(currAe.slope);
-
-    __m128i xAdd0 = mm_mul4x32(xs0, csteps0);
-    __m128i xAdd1 = mm_mul4x32(xs0, csteps1);
-    __m128i coords0 = _mm_add_epi32(xStart, xAdd0);
-    __m128i coords1 = _mm_add_epi32(xStart, xAdd1);
-    __m128i coords = _mm_packs_epi32(coords0, coords1);
-
-    __m128i dir = _mm_set1_epi16(currAe.direction);
-    __m128i yMask = _mm_packs_epi32(yMask0, yMask1);
-    __m128i mDir = _mm_and_si128(dir, yMask);
-
-    __m128i sampleCoords = qCoords;
-
-    __m128i sw, pw;
-    mm_get_xmasks(coords, sampleCoords, sw, pw);
-
-    pixel.sseWinding = _mm_add_epi16(pixel.sseWinding, _mm_and_si128(pw, mDir));
-    scanline.sseWinding = _mm_add_epi16(scanline.sseWinding, _mm_and_si128(sw, mDir));
-#endif
-
-#undef QUEEN_COORD
-
-}
-
-/**
- * \brief   Calculate winding using one sample only.
- * \note    This uses most of the same code as the AA-case even though it is not
- *          necessary (one sample would be enough).
- */
-RI_INLINE void calculateAEWindingNoAA(const Rasterizer::ActiveEdge& currAe, Rasterizer::Windings& pixel, Rasterizer::Windings& scanline, int intY, int pixelX)
-{
-#if defined(USE_SSE2)
-
-#define QUEEN_COORD(Y) ((Y<<(Rasterizer::RASTERIZER_BITS - Rasterizer::SAMPLE_BITS)) + (1<<(Rasterizer::RASTERIZER_BITS-Rasterizer::SAMPLE_BITS-1)))
-    const int half = 1<<(Rasterizer::RASTERIZER_BITS-1);
-
-    __m128i sampleCoords = _mm_set1_epi16(half);
-
-    RI_ASSERT(Rasterizer::RASTERIZER_BITS <= 14);
-
-    const int cy = intY << Rasterizer::Y_BITS;
-
-    int py0, py1;
-    getVerticalSubpixels(intY, currAe.yStart, currAe.yEnd, py0, py1);
-
-    __m128i yMask;
-
-    if (py0 <= 4 && py1 >= 4)
-        yMask = _mm_set1_epi8(-1);
-    else
-        yMask = _mm_set1_epi8(0);
-
-    const __m128i csteps0 = _mm_set_epi32(4,4,4,4);
-    const __m128i csteps1 = _mm_set_epi32(4,4,4,4);
-
-    const int edgeX = currAe.xRef + (cy - (currAe.yStart & ~Rasterizer::Y_MASK)) * currAe.slope;
-    const __m128i xStart = _mm_set1_epi32(edgeX - pixelX);
-
-    const __m128i xs0 = _mm_set1_epi32(currAe.slope);
-
-    __m128i xAdd0 = mm_mul4x32(xs0, csteps0);
-    __m128i xAdd1 = mm_mul4x32(xs0, csteps1);
-    __m128i coords0 = _mm_add_epi32(xStart, xAdd0);
-    __m128i coords1 = _mm_add_epi32(xStart, xAdd1);
-    __m128i coords = _mm_packs_epi32(coords0, coords1);
-
-    __m128i dir = _mm_set1_epi16(currAe.direction);
-    __m128i mDir = _mm_and_si128(dir, yMask);
-    //__m128i mDir = dir;
-
-    __m128i sw, pw;
-    mm_get_xmasks(coords, sampleCoords, sw, pw);
-
-    pixel.sseWinding = _mm_add_epi16(pixel.sseWinding, _mm_and_si128(pw, mDir));
-    scanline.sseWinding = _mm_add_epi16(scanline.sseWinding, _mm_and_si128(sw, mDir));
-
-#undef QUEEN_COORD
-
-#else
-    RI_ASSERT(false); // Not implemented.
-#endif
-}
-
-#if defined(USE_SSE2)
-RI_INLINE static int mm_winding_to_coverage(const Rasterizer::Windings& pixel, int fillRuleMask)
-{
-    // This version uses SSE2 counters.
-    __m128i mask = _mm_set1_epi16(fillRuleMask);
-    __m128i t = _mm_and_si128(mask, pixel.sseWinding);
-    __m128i z = _mm_setzero_si128();
-    __m128i isz = _mm_cmpeq_epi16(t, z);
-    __m128i ones = _mm_set1_epi16(1);
-    __m128i res = _mm_add_epi16(ones, isz);
-    __m128i add0 = _mm_add_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(2, 3, 2, 3)));
-    __m128i add1 = _mm_add_epi16(add0, _mm_shuffle_epi32(add0, _MM_SHUFFLE(1, 1, 1, 1)));
-    __m128i add2 = _mm_add_epi16(add1, _mm_shufflelo_epi16(add1, _MM_SHUFFLE(1, 1, 1, 1)));
-
-    int nSamples = _mm_cvtsi128_si32(add2) & 0xff;
-    return nSamples;
-}
-#endif
-
-#define RI_DEBUG
-#if defined(RI_DEBUG)
-void maybeDumpEdges(Array<Rasterizer::ActiveEdge> &edgePool)
-{
-    return;
-    // \note This gives an idea about the edges at the rasterization stage.
-    // Input edges must be output at a different stage.
-    RI_PRINTF("lines = []\n");
-    for (int i = 0 ; i < edgePool.size(); i++)
-    {
-        const int slope = edgePool[i].slope;
-        int x0, x1, y0, y1;
-        y0 = edgePool[i].yStart;
-        y1 = edgePool[i].yEnd;
-        x0 = edgePool[i].xRef + (slope * (y0 & Rasterizer::Y_MASK));
-        x1 = (edgePool[i].xRef + (slope * (y1 - (y0 & ~Rasterizer::Y_MASK))))>>(Rasterizer::RASTERIZER_BITS-Rasterizer::X_BITS);
-        RI_PRINTF("lines += [[%d, %d], [%d, %d]]\n",x0>>(Rasterizer::RASTERIZER_BITS-Rasterizer::X_BITS),y0,x1,y1);
-    }
-}
-#endif
-
-/*-------------------------------------------------------------------*//*!
-* \brief	Calls PixelPipe::pixelPipe for each pixel with coverage greater
-*			than zero.
-* \param
-* \return
-* \note
-*//*-------------------------------------------------------------------*/
-void Rasterizer::fill()
-{
-    if(m_scissor && !m_scissorEdges.size())
-        return;	//scissoring is on, but there are no scissor rectangles => nothing is visible
-
-    int firstAe = 0;
-
-    //proceed scanline by scanline
-    //keep track of edges that can intersect the pixel filters of the current scanline (Active Edge Table)
-    //until all pixels of the scanline have been processed
-    //  for all sampling points of the current pixel
-    //    determine the winding number using edge functions
-    //    add filter weight to coverage
-    //  divide coverage by the number of samples
-    //  determine a run of pixels with constant coverage
-    //  call fill callback for each pixel of the run
-
-    const int fillRuleMask = m_fillRuleMask;
-
-    int bbminx = (m_edgeMin.x >> X_BITS);
-    int bbminy = (m_edgeMin.y >> Y_BITS);
-    int bbmaxx = (m_edgeMax.x >> X_BITS)+1;
-    int bbmaxy = (m_edgeMax.y >> Y_BITS)+1;
-    int sx = RI_INT_MAX(m_vpx, bbminx);
-    int ex = RI_INT_MIN(m_vpx+m_vpwidth, bbmaxx);
-    int sy = RI_INT_MAX(m_vpy, bbminy);
-    int ey = RI_INT_MIN(m_vpy+m_vpheight, bbmaxy);
-    if(sx < m_covMinx) m_covMinx = sx;
-    if(sy < m_covMiny) m_covMiny = sy;
-    if(ex > m_covMaxx) m_covMaxx = ex;
-    if(ey > m_covMaxy) m_covMaxy = ey;
-
-#if 0
-    // Dump edges:
-    static bool dump = true;
-    if (dump)
-    {
-        RI_PRINTF("lines = []\n");
-        for (int ie = 0; dump && ie < m_edgePool.size(); ie++)
-        {
-            RI_PRINTF("lines += [[%d, %d], [%d, %d]]\n",m_edgePool[ie].v0.x, m_edgePool[ie].v0.y, m_edgePool[ie].v1.x, m_edgePool[ie].v1.y);
-        }
-        dump = false;
-    }
-
-#endif
-    int debugMagic = 0;
-
-    m_aet.clear();
-
-#if defined(RI_DEBUG)
-    maybeDumpEdges(m_edgePool);
-#endif
-
-    //fill the screen
-    for(int j = sy; j < ey; j++)
-    {
-        Windings scanlineWinding;
-        const int cminy = j << Y_BITS;
-
-        if (m_scissor)
-        {
-            // Gather scissor edges intersecting this scanline
-            // \todo Don't clear, remove unused instead!
-            m_scissorAet.clear();
-
-            for(int e = 0; e < m_scissorEdges.size(); e++)
-            {
-                const ScissorEdge& se = m_scissorEdges[e];
-
-                if(j >= se.miny && j < se.maxy)
-                    m_scissorAet.push_back(m_scissorEdges[e]);	//throws bad_alloc
-            }
-
-            //sort scissor AET by edge x
-            if (m_scissor)
-                m_scissorAet.sort();
-        }
-
-        // Drop unused edges, update remaining.
-        // \todo Combine with full sweep. Use a sort-friendly edge-discard.
-        for (int iae = firstAe; iae < m_aet.size(); iae++)
-        {
-            ActiveEdge& ae = m_aet[iae];
-
-            if (cminy >= ae.yEnd)
-            {
-                m_aet[iae] = m_aet[firstAe];
-                firstAe++;
-                continue;
-            }
-
-            /* Update existing coordinates */
-            // \todo AND instead of shift. See other places also.
-            const int y0 = (ae.yStart & ~Y_MASK);
-            const int x = ae.xRef + ((j << Y_BITS) - y0) * ae.slope;
-            ae.minx = x >> RASTERIZER_BITS;
-            ae.maxx = (x + ae.slope * (1<<Y_BITS)) >> RASTERIZER_BITS;
-
-            if (ae.minx > ae.maxx)
-                RI_ANY_SWAP(ActiveEdge::XCoord, ae.minx, ae.maxx);
-
-            // If the edge is not visible, "mark" it as immediately applicable
-            // \todo Verify that this is the correct procedure.
-
-           if (ae.maxx < 0)
-               ae.minx = ae.maxx = LEFT_DISCARD_SHORT;
-       }
-
-        /* Add new edges */
-
-        RIuint32 aeIndex = m_edges[j];
-        while (aeIndex != EDGE_TERMINATOR)
-        {
-            const ActiveEdge& ae = m_edgePool[aeIndex];
-            m_aet.push_back(ae); // \todo Just copy pointers?
-            aeIndex = ae.next;
-        }
-
-        if (firstAe >= m_aet.size())
-        {
-            RI_ASSERT(firstAe == m_aet.size());
-            continue;	//no edges on the whole scanline, skip it
-        }
-
-        //sort AET by edge minx
-        m_aet.sort(firstAe, m_aet.size() - 1);
-
-        // \todo Optimize adding and updating the edges?
-        if (m_scissor && !m_scissorAet.size())
-            continue;	// Scissoring is on, but there are no scissor rectangles on this scanline.
-
-        //fill the scanline
-        int scissorWinding = m_scissor ? 0 : 1;	//if scissoring is off, winding is always 1
-        int scissorIndex = 0;
-        int aes = firstAe;
-        int aen = firstAe;
-
-        RI_ASSERT(sx >= 0);
-
-#if 1
-        if (m_aa)
-        {
-            while ((aen < m_aet.size()) && (m_aet[aen].maxx < 0))
-            {
-                applyLeftEdge(m_aet[aen], scanlineWinding, j);
-                aen++;
-            }
-        }
-        else
-        {
-            while ((aen < m_aet.size()) && (m_aet[aen].maxx < 0))
-            {
-                applyLeftEdgeNoAA(m_aet[aen], scanlineWinding, j);
-                aen++;
-            }
-        }
-
-#if defined(RI_DEBUG)
-        for (int a = aen; a < m_aet.size(); a++)
-        {
-            RI_ASSERT(m_aet[a].maxx >= 0);
-        }
-#endif
-#endif
-
-        // \todo Combine this with the first check or reorganize the "clipping".
-        if (aen >= m_aet.size())
-            continue; // No edges within viewport. Can happen atm. when all edges are "left".
-
-        for(int i = sx; i < ex;)
-        {
-            //find edges that intersect or are to the left of the pixel antialiasing filter
-            while(aes < m_aet.size() && (i + 1) >= m_aet[aes].minx)
-                aes++;
-            //edges [0,aes[ may have an effect on winding, and need to be evaluated while sampling
-
-            // RIint8 winding[SF_SAMPLES];
-            Windings pixelWinding;
-
-            pixelWinding = scanlineWinding;
-
-            if (m_aa)
-            {
-                for(int e = aen; e < aes; e++)
-                {
-                    const ActiveEdge& currAe = m_aet[e];
-                    calculateAEWinding(currAe, pixelWinding, scanlineWinding, j, i << RASTERIZER_BITS);
-                }
-            }
-            else
-            {
-                for(int e = aen; e < aes; e++)
-                {
-                    const ActiveEdge& currAe = m_aet[e];
-                    calculateAEWindingNoAA(currAe, pixelWinding, scanlineWinding, j, i << RASTERIZER_BITS);
-                }
-            }
-
-            //compute coverage
-            int coverageSamples = 0;
-#if !defined(USE_SSE2)
-
-            for (int s = 0; s < SF_SAMPLES; s++)
-            {
-                if(pixelWinding.winding[s])
-                {
-                    coverageSamples++;
-                }
-            }
-#else
-           coverageSamples = mm_winding_to_coverage(pixelWinding, fillRuleMask);
-            _mm_empty();
-#endif
-
-            //constant coverage optimization:
-            //scan AET from left to right and skip all the edges that are completely to the left of the pixel filter.
-            //since AET is sorted by minx, the edge we stop at is the leftmost of the edges we haven't passed yet.
-            //if that edge is to the right of this pixel, coverage is constant between this pixel and the start of the edge.
-            while(aen < m_aet.size() && m_aet[aen].maxx < i)
-                aen++;
-
-            int endSpan = m_vpx + m_vpwidth;	// endSpan is the first pixel NOT part of the span
-
-            if(aen < m_aet.size())
-            {
-                endSpan = RI_INT_MAX(i+1, RI_INT_MIN(endSpan, m_aet[aen].minx));
-            }
-
-            //fill a run of pixels with constant coverage
-            if(coverageSamples)
-            {
-
-                if (!m_scissor)
-                {
-                    int fillStartX = i;	/* Inclusive */
-                    pushSpan(fillStartX, j, (endSpan - fillStartX), coverageSamples);
-                }
-                else // (scissor)
-                {
-                    int fillStartX = i;
-                    //update scissor winding number
-
-                    /* \todo Sort the scissor edges and skip unnecessary checks when scissors are used */
-                    while (scissorIndex < m_scissorAet.size() && m_scissorAet[scissorIndex].x <= fillStartX)
-                    {
-                        scissorWinding += m_scissorAet[scissorIndex++].direction;
-                    }
-
-                    while (!scissorWinding && scissorIndex < m_scissorAet.size() && m_scissorAet[scissorIndex].x < endSpan)
-                    {
-                        fillStartX = m_scissorAet[scissorIndex].x;
-                        scissorWinding += m_scissorAet[scissorIndex++].direction;
-                        RI_ASSERT(fillStartX >= i);
-                    }
-
-                    RI_ASSERT(scissorWinding >= 0);
-
-                    int endScissorSpan = endSpan;
-
-                    while (scissorWinding && fillStartX < endSpan && (scissorIndex < m_scissorAet.size()))
-                    {
-
-                        // Determine the end of renderable area:
-                        while (scissorWinding && scissorIndex < m_scissorAet.size() && m_scissorAet[scissorIndex].x <= endSpan)
-                        {
-                            endScissorSpan = m_scissorAet[scissorIndex].x;
-                            scissorWinding += m_scissorAet[scissorIndex++].direction;
-                        }
-
-                        RI_ASSERT(fillStartX >= i);
-                        RI_ASSERT(endScissorSpan <= endSpan);
-
-                        pushSpan(fillStartX, j, (endScissorSpan - fillStartX), coverageSamples);
-                        fillStartX = endScissorSpan;
-                        endScissorSpan = endSpan;
-
-                        // Skip until within drawable area
-                        while (!scissorWinding && scissorIndex < m_scissorAet.size() && m_scissorAet[scissorIndex].x < endSpan)
-                        {
-                            fillStartX = m_scissorAet[scissorIndex].x;
-                            scissorWinding += m_scissorAet[scissorIndex++].direction;
-                        }
-
-                    }
-                }
-            }
-            i = endSpan;
-        }
-    }
-    commitSpans();
-#if defined(USE_SSE2)
-    _mm_empty();
-#endif
-    clear();
-}
-
-RI_INLINE void Rasterizer::commitSpans()
-{
-    if (!m_nSpans)
-        return;
-
-    m_pixelPipe->fillSpans(m_ppVariants, m_spanCache, m_nSpans);
-    m_nSpans = 0;
-
-}
-
-RI_INLINE void Rasterizer::pushSpan(int x, int y, int len, int coverage)
-{
-    //printf("x: %d, y: %d, len: %d, coverage: %d\n", x, y, len, coverage);
-    // \todo Check what causes this with scissors
-    if (len <= 0) return;
-    //RI_ASSERT(len > 0);
-
-    Span& span = m_spanCache[m_nSpans];
-
-    span.x0 = x;
-    span.y = y;
-    span.len = (RIuint16)len;
-    span.coverage = coverage;
-
-    m_nSpans++;
-
-    if (m_nSpans == N_CACHED_SPANS)
-    {
-        commitSpans();
-    }
-}
-
-//=======================================================================
-
-}	//namespace OpenVGRI