빠른 base64 encoding/decoding 구현 by C++

게시자: aro2jongwonlee

이번에 필요에 의해 base64 encoding/decoding을 구현해야 할 일이 있었다.

사용하던 라이브러리 (ACE/POCO)에 base64 기능이 있었지만, 라이브러리 전환도 고려해야해서 직접 구현하기로 했다.

이런저런 구현 소스들을 찾아 테스트해본 결과는 다음 코드가 가장 속도가 빨랐다.

static const char MimeBase64[] = {
‘A’, ‘B’, ‘C’, ‘D’, ‘E’, ‘F’, ‘G’, ‘H’,
‘I’, ‘J’, ‘K’, ‘L’, ‘M’, ‘N’, ‘O’, ‘P’,
‘Q’, ‘R’, ‘S’, ‘T’, ‘U’, ‘V’, ‘W’, ‘X’,
‘Y’, ‘Z’, ‘a’, ‘b’, ‘c’, ‘d’, ‘e’, ‘f’,
‘g’, ‘h’, ‘i’, ‘j’, ‘k’, ‘l’, ‘m’, ‘n’,
‘o’, ‘p’, ‘q’, ‘r’, ‘s’, ‘t’, ‘u’, ‘v’,
‘w’, ‘x’, ‘y’, ‘z’, ‘0’, ‘1’, ‘2’, ‘3’,
‘4’, ‘5’, ‘6’, ‘7’, ‘8’, ‘9’, ‘+’, ‘/’
};

static int DecodeMimeBase64[256] = {
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63,
52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,
-1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
};

typedef union {
struct {
#ifdef LITTLE_ENDIAN
unsigned char c1, c2, c3;
#else
unsigned char c3, c2, c1;
#endif
};
struct {
#ifdef LITTLE_ENDIAN
unsigned int e1:6, e2:6, e3:6, e4:6;
#else
unsigned int e4:6, e3:6, e2:6, e1:6;
#endif
};
} BF;

int HUMRedisObject::_base64enc(char *src, int src_size, char **result)
{
int i, j = 0;
BF temp;

int size = (4 * (src_size / 3)) + (src_size % 3 ? 4 : 0) + 1;
(*result) = new char[size];

for (i = 0 ; i < src_size ; i = i+3, j = j+4)
{
temp.c3 = src[i];
if ((i+1) > src_size) temp.c2 = 0;
else temp.c2 = src[i+1];
if ((i+2) > src_size) temp.c1 = 0;
else temp.c1 = src[i+2];

(*result)[j] = MimeBase64[temp.e4];
(*result)[j+1] = MimeBase64[temp.e3];
(*result)[j+2] = MimeBase64[temp.e2];
(*result)[j+3] = MimeBase64[temp.e1];

if ((i+2) > src_size) (*result)[j+2] = ‘=’;
if ((i+3) > src_size) (*result)[j+3] = ‘=’;
}
(*result)[size-1] = ”;
return size;
}

void HUMRedisObject::_base64dec(char *src, char *result, int *length)
{
int i, j = 0, src_length, blank = 0;
BF temp;

src_length = strlen(src);

for(i = 0 ; i < src_length ; i = i+4, j = j+3){
temp.e4 = DecodeMimeBase64[src[i]];
temp.e3 = DecodeMimeBase64[src[i+1]];
if(src[i+2] == ‘=’){
temp.e2 = 0;
blank++;
} else temp.e2 = DecodeMimeBase64[src[i+2]];
if(src[i+3] == ‘=’){
temp.e1 = 0;
blank++;
} else temp.e1 = DecodeMimeBase64[src[i+3]];

result[j] = temp.c3;
result[j+1] = temp.c2;
result[j+2] = temp.c1;
}
*length = j-blank;
}

원본은 다음과 같다.

빠른 base64 인코딩/디코딩 소스 – by ironiris (https://kldp.org/node/109436)

base64 인코딩/디코딩의 원리 – by I’m MK! (http://www.iamcorean.net/130)

원본과는 다르게 메모리 할당을 함수내에서 구현하고 그 결과를 돌려 주는 형태로 바꾸었다.

원래 할당을 한 구역내에서 해제하는 것을 원칙으로 하고 있으나,

이 경우는 편의성을 생각해서 위와 같이 구현하였다.

Advertisements