RC4 выдрано из rpcrt4.dll
VMPC немного переделанный исходник

#pragma pack(1)
// Состояние экземпляра RC4
typedef struct _RC4_INSTANCE_STATE {
  BYTE SBOX[256];  // S-блок
  int i;           // Регистр RC4
  int j;           // Регистр RC4
} RC4_INSTANCE_STATE, *PRC4_INSTANCE_STATE;

typedef unsigned char u8;
typedef struct _VMPC_STATE {
  u8 P[256];
  u8 n;
  u8 s;
} VMPC_STATE, *PVMPC_STATE;
#pragma pack()

// RC4 Key Scheduling Algorithm (KSA)
void rc4_key(PRC4_INSTANCE_STATE pRC4State, DWORD cbData, BYTE* pbData) {
  BYTE t = 0;

  for (pRC4State->i = 0; pRC4State->i < 256; pRC4State->i++)
    pRC4State->SBOX[pRC4State->i] = pRC4State->i;

  pRC4State->j = 0;
  for (pRC4State->i = 0; pRC4State->i < 256; pRC4State->i++) {
    pRC4State->j = (pRC4State->j + pRC4State->SBOX[pRC4State->i] +
                    (*(pbData + pRC4State->i))) &
                   0x0FF;
  };

  t = pRC4State->SBOX[pRC4State->i];
  pRC4State->SBOX[pRC4State->i] = pRC4State->SBOX[pRC4State->j];
  pRC4State->SBOX[pRC4State->j] = t;
};

// Классическая реализация алгоритма RC4
void rc4(PRC4_INSTANCE_STATE pRC4State, DWORD cbData, BYTE* pbData) {
  BYTE t = 0;

  for (int p = 0; p < cbData; p++) {
    pRC4State->i = (pRC4State->i + 1) & 0x0FF;
    pRC4State->j = (pRC4State->j + pRC4State->SBOX[pRC4State->i]) & 0x0FF;
    t = pRC4State->SBOX[pRC4State->i];
    pRC4State->SBOX[pRC4State->i] = pRC4State->SBOX[pRC4State->j];
    pRC4State->SBOX[pRC4State->j] = t;
    *(pbData + p) =
        *(pbData + p) ^ pRC4State->SBOX[(pRC4State->SBOX[pRC4State->i] +
                                         pRC4State->SBOX[pRC4State->j]) &
                                        0xFF];
  }
}

void VMPC_init(PVMPC_STATE pVMPC, u8* key, u8* iv) {
  long m, i;
  u8 temp;
  pVMPC->s = 0;

  for (i = 0; i != 256; i++) {
    pVMPC->P[i] = (u8)i;
  }

  for (m = 0; m != 768; m++) {
    pVMPC->s = pVMPC->P[(pVMPC->s + pVMPC->P[m & 0xff] + key[m % 64]) & 0xff];
    temp = pVMPC->P[m & 0xff];
    pVMPC->P[m & 0xff] = pVMPC->P[pVMPC->s & 0xff];
    pVMPC->P[pVMPC->s & 0xff] = temp;
  }

  for (m = 0; m < 768; m++) {
    pVMPC->s = pVMPC->P[(pVMPC->s + pVMPC->P[m & 0xff] + iv[m % 64]) & 0xff];
    temp = pVMPC->P[m & 0xff];
    pVMPC->P[m & 0xff] = pVMPC->P[pVMPC->s & 0xff];
    pVMPC->P[pVMPC->s & 0xff] = temp;
  }

  for (m = 0; m != 768; m++) {
    pVMPC->s = pVMPC->P[(pVMPC->s + pVMPC->P[m & 0xff] + key[m % 64]) & 0xff];
    temp = pVMPC->P[m & 0xff];
    pVMPC->P[m & 0xff] = pVMPC->P[pVMPC->s & 0xff];
    pVMPC->P[pVMPC->s & 0xff] = temp;
  }

  pVMPC->n = 0;
}

void VMPC_crypt(PVMPC_STATE pVMPC, u8* input, u8* output, long len) {
  long i;
  u8 z, temp;
  for (i = 0; i != len; i++) {
    pVMPC->s = pVMPC->P[(pVMPC->s + pVMPC->P[pVMPC->n & 0xff]) & 0xff];
    z = pVMPC->P[(pVMPC->P[(pVMPC->P[pVMPC->s & 0xff]) & 0xff] + 1) & 0xff];
    temp = pVMPC->P[pVMPC->n & 0xff];
    pVMPC->P[pVMPC->n & 0xff] = pVMPC->P[pVMPC->s & 0xff];
    pVMPC->P[pVMPC->s & 0xff] = temp;
    pVMPC->n = (u8)((pVMPC->n + 1) & 0xff);
    output[i] = (u8)(input[i] ^ z);
  }
}

Примерчик

BOOL(__stdcall *SystemFunction036)
(PVOID RandomBuffer, ULONG RandomBufferLength);

int main() {
  char text[] = "garik";
  RC4_INSTANCE_STATE rc4_st;
  BYTE key[256];

  // генерим рандомный ключ в 2048 бит  8)
  // [url]http://msdn.microsoft.com/en-us/library/aa387694(VS.85).aspx[/url] ==
  // RtlGenRandom
  SystemFunction036 = (BOOL(__stdcall *)(PVOID, ULONG))GetProcAddress(
      GetModuleHandle("Advapi32.dll"), "SystemFunction036");
  SystemFunction036(key, 256);

  // инициализируем RC4
  rc4_key(&rc4_st, 256, key);

  // шифруем
  rc4(&rc4_st, 5, (BYTE *)text);

  // SBOX то изменился поэтому инициализируем снова
  rc4_key(&rc4_st, 256, key);

  // расшифровываем
  rc4(&rc4_st, 5, (BYTE *)text);

  return 0;
}