RSS

Compute modulus division by (1 < s)="" –="" 1="" in="" parallel="" without="" a="" division="" operator="">

Fri, Mar 5, 2010

Algorithm

// The following is for a word size of 32 bits!
static const unsigned int M[] =
{
0×00000000, 0×55555555, 0×33333333, 0xc71c71c7, 
0×0f0f0f0f, 0xc1f07c1f, 0×3f03f03f, 0xf01fc07f,
0×00ff00ff, 0×07fc01ff, 0×3ff003ff, 0xffc007ff,
0xff000fff, 0xfc001fff, 0xf0003fff, 0xc0007fff,
0×0000ffff, 0×0001ffff, 0×0003ffff, 0×0007ffff,
0×000fffff, 0×001fffff, 0×003fffff, 0×007fffff,
0×00ffffff, 0×01ffffff, 0×03ffffff, 0×07ffffff,
0×0fffffff, 0×1fffffff, 0×3fffffff, 0×7fffffff
};
static const unsigned int Q[][6] =
{
{ 0,  0,  0,  0,  0,  0}, {16,  8,  4,  2,  1,  1}, {16,  8,  4,  2,  2,  2},
{15,  6,  3,  3,  3,  3}, {16,  8,  4,  4,  4,  4}, {15,  5,  5,  5,  5,  5},
{12,  6,  6,  6 , 6,  6}, {14,  7,  7,  7,  7,  7}, {16,  8,  8,  8,  8,  8},
{ 9,  9,  9,  9,  9,  9}, {10, 10, 10, 10, 10, 10}, {11, 11, 11, 11, 11, 11},
{12, 12, 12, 12, 12, 12}, {13, 13, 13, 13, 13, 13}, {14, 14, 14, 14, 14, 14},
{15, 15, 15, 15, 15, 15}, {16, 16, 16, 16, 16, 16}, {17, 17, 17, 17, 17, 17},
{18, 18, 18, 18, 18, 18}, {19, 19, 19, 19, 19, 19}, {20, 20, 20, 20, 20, 20},
{21, 21, 21, 21, 21, 21}, {22, 22, 22, 22, 22, 22}, {23, 23, 23, 23, 23, 23},
{24, 24, 24, 24, 24, 24}, {25, 25, 25, 25, 25, 25}, {26, 26, 26, 26, 26, 26},
{27, 27, 27, 27, 27, 27}, {28, 28, 28, 28, 28, 28}, {29, 29, 29, 29, 29, 29},
{30, 30, 30, 30, 30, 30}, {31, 31, 31, 31, 31, 31}
};
static const unsigned int R[][6] =
{
{0×00000000, 0×00000000, 0×00000000, 0×00000000, 0×00000000, 0×00000000},
{0×0000ffff, 0×000000ff, 0×0000000f, 0×00000003, 0×00000001, 0×00000001},
{0×0000ffff, 0×000000ff, 0×0000000f, 0×00000003, 0×00000003, 0×00000003},
{0×00007fff, 0×0000003f, 0×00000007, 0×00000007, 0×00000007, 0×00000007},
{0×0000ffff, 0×000000ff, 0×0000000f, 0×0000000f, 0×0000000f, 0×0000000f},
{0×00007fff, 0×0000001f, 0×0000001f, 0×0000001f, 0×0000001f, 0×0000001f},
{0×00000fff, 0×0000003f, 0×0000003f, 0×0000003f, 0×0000003f, 0×0000003f},
{0×00003fff, 0×0000007f, 0×0000007f, 0×0000007f, 0×0000007f, 0×0000007f},
{0×0000ffff, 0×000000ff, 0×000000ff, 0×000000ff, 0×000000ff, 0×000000ff},
{0×000001ff, 0×000001ff, 0×000001ff, 0×000001ff, 0×000001ff, 0×000001ff},
{0×000003ff, 0×000003ff, 0×000003ff, 0×000003ff, 0×000003ff, 0×000003ff},
{0×000007ff, 0×000007ff, 0×000007ff, 0×000007ff, 0×000007ff, 0×000007ff},
{0×00000fff, 0×00000fff, 0×00000fff, 0×00000fff, 0×00000fff, 0×00000fff},
{0×00001fff, 0×00001fff, 0×00001fff, 0×00001fff, 0×00001fff, 0×00001fff},
{0×00003fff, 0×00003fff, 0×00003fff, 0×00003fff, 0×00003fff, 0×00003fff},
{0×00007fff, 0×00007fff, 0×00007fff, 0×00007fff, 0×00007fff, 0×00007fff},
{0×0000ffff, 0×0000ffff, 0×0000ffff, 0×0000ffff, 0×0000ffff, 0×0000ffff},
{0×0001ffff, 0×0001ffff, 0×0001ffff, 0×0001ffff, 0×0001ffff, 0×0001ffff},
{0×0003ffff, 0×0003ffff, 0×0003ffff, 0×0003ffff, 0×0003ffff, 0×0003ffff},
{0×0007ffff, 0×0007ffff, 0×0007ffff, 0×0007ffff, 0×0007ffff, 0×0007ffff},
{0×000fffff, 0×000fffff, 0×000fffff, 0×000fffff, 0×000fffff, 0×000fffff},
{0×001fffff, 0×001fffff, 0×001fffff, 0×001fffff, 0×001fffff, 0×001fffff},
{0×003fffff, 0×003fffff, 0×003fffff, 0×003fffff, 0×003fffff, 0×003fffff},
{0×007fffff, 0×007fffff, 0×007fffff, 0×007fffff, 0×007fffff, 0×007fffff},
{0×00ffffff, 0×00ffffff, 0×00ffffff, 0×00ffffff, 0×00ffffff, 0×00ffffff},
{0×01ffffff, 0×01ffffff, 0×01ffffff, 0×01ffffff, 0×01ffffff, 0×01ffffff},
{0×03ffffff, 0×03ffffff, 0×03ffffff, 0×03ffffff, 0×03ffffff, 0×03ffffff},
{0×07ffffff, 0×07ffffff, 0×07ffffff, 0×07ffffff, 0×07ffffff, 0×07ffffff},
{0×0fffffff, 0×0fffffff, 0×0fffffff, 0×0fffffff, 0×0fffffff, 0×0fffffff},
{0×1fffffff, 0×1fffffff, 0×1fffffff, 0×1fffffff, 0×1fffffff, 0×1fffffff},
{0×3fffffff, 0×3fffffff, 0×3fffffff, 0×3fffffff, 0×3fffffff, 0×3fffffff},
{0×7fffffff, 0×7fffffff, 0×7fffffff, 0×7fffffff, 0×7fffffff, 0×7fffffff}
};

unsigned int n;       // numerator
const unsigned int s; // s > 0
const unsigned int d = (1 << s) - 1; // so d is either 1, 3, 7, 15, 31, …).
unsigned int m;       // n % d goes here.
m = (n & M[s]) + ((n & ~M[s]) >> s);
for (const unsigned int * q = &Q[s][0], * r = &R[s][0]; m > d; q++, r++)
{
m = (m >> *q) + (m & *r);
}
m = m == d ? 0 : m; // OR, less portably: m = m & -((signed)(m – d) >> s);
This method of finding modulus division by an integer that is one less than a power of 2 takes at most O(lg(N)) time, where N is the number of bits in the numerator (32 bits, for the code above). The number of operations is at most 12 + 9 * ceil(lg(N)). The tables may be removed if you know the denominator at compile time; just extract the few relevent entries and unroll the loop. It may be easily extended to more bits.
It finds the result by summing the values in base (1 << s) in parallel. First every other base (1 << s) value is added to the previous one. Imagine that the result is written on a piece of paper. Cut the paper in half, so that half the values are on each cut piece. Align the values and sum them onto a new piece of paper. Repeat by cutting this paper in half (which will be a quarter of the size of the previous one) and summing, until you cannot cut further. After performing lg(N/s/2) cuts, we cut no more; just continue to add the values and put the result onto a new piece of paper as before, while there are at least two s-bit values.

Sharing ~ Helping Other:
  • Print
  • email
  • Digg
  • del.icio.us
  • Facebook
  • Google Bookmarks
  • BlinkList
  • DZone
  • Slashdot
  • YahooMyWeb
  • StumbleUpon
  • Live
  • IndianPad
  • DotNetKicks
  • Technorati

Other Posts:

This post was written by:

eXclusiveMinds - who has written 500 posts on eXclusiveMinds.


Contact the author

Leave a Reply

You must be logged in to post a comment.