Preface - Simple Dynamic Strings
antirez built this project because it wanted to unify SDS code in Redis, Disque, Hiredis projects
https://github.com/antirez/sds For more background information, you can read README.md.
The sds project is a trade-off implementation of the C-string data structure in the real world, and the library itself is non-thread safe.
Let's take you hand-written code to get a thorough understanding of the intent of this library (antirez notes are great).
#define SDS_MAX_PREALLOC (1024*1024)
/* Note: sdshdr5 is never used, we just access the flags byte directly.
* However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr8 {
uint8_t len; /* used */
uint8_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
uint16_t len; /* used */
uint16_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
uint32_t len; /* used */
uint32_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
uint64_t len; /* used */
uint64_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
#define SDS_TYPE_5 0
#define SDS_TYPE_8 1
#define SDS_TYPE_16 2
#define SDS_TYPE_32 3
#define SDS_TYPE_64 4
#define SDS_TYPE_MASK 7
#define SDS_TYPE_BITS 3
#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
#define SDS_HDR(T, s) ((struct sdshdr##T *)((s) - (sizeof(struct sdshdr##T))))
#define SDS_HDR_VAR(T, s) struct sdshdr##T * sh = SDS_HDR(T, s)
You can first analyze sdshdr5 sdshdr8 sdshdr16 sdshdr32 sdshdr64 and feel the author's intentions.
First of all, these basic structures have four fields: len, alloc, flags, buf. Some students will ask, sdshdr5
There are no alloc and len fields in sdshdr5. This is a special structure. Both alloc and len are implied in flags.
You can see from the function macro SDS_TYPE_5_LEN(f). Thus, the characters expressed by sdshdr5
String length and string capacity are the same by default. Consider the intent of u attribute_ ((u packed_u)) (telling the compiler to cancel knots)
Optimized alignment of structures during compilation, aligned according to the actual number of bytes occupied). For canceling compilation alignment optimization of structure memory, my research is
Two points, one saves memory, two makes memory portable.
Most of the following are pipeline codes, which are very understandable. For example, the code with such a relationship, sdsalloc() = sdsavail() + sdslen()
inline size_t sdslen(const sds s) {
unsigned char flags = s[-1];
switch (flags & SDS_TYPE_MASK) {
case SDS_TYPE_5 :
return SDS_TYPE_5_LEN(flags);
case SDS_TYPE_8 :
return SDS_HDR(8 , s)->len;
case SDS_TYPE_16:
return SDS_HDR(16, s)->len;
case SDS_TYPE_32:
return SDS_HDR(32, s)->len;
case SDS_TYPE_64:
return SDS_HDR(64, s)->len;
}
return 0;
}
inline size_t sdsavail(const sds s) {
unsigned char flags = s[-1];
switch (flags & SDS_TYPE_MASK) {
case SDS_TYPE_8 : {
SDS_HDR_VAR(8 , s);
return sh->alloc - sh->len;
}
case SDS_TYPE_16: {
SDS_HDR_VAR(16, s);
return sh->alloc - sh->len;
}
case SDS_TYPE_32: {
SDS_HDR_VAR(32, s);
return sh->alloc - sh->len;
}
case SDS_TYPE_64: {
SDS_HDR_VAR(64, s);
return sh->alloc - sh->len;
}
default:
return 0;
}
}
/* sdsalloc() = sdsavail() + sdslen() */
inline size_t sdsalloc(const sds s) {
unsigned char flags = s[-1];
switch (flags & SDS_TYPE_MASK) {
case SDS_TYPE_5 :
return SDS_TYPE_5_LEN(flags);
case SDS_TYPE_8 :
return SDS_HDR(8 , s)->alloc;
case SDS_TYPE_16:
return SDS_HDR(16, s)->alloc;
case SDS_TYPE_32:
return SDS_HDR(32, s)->alloc;
case SDS_TYPE_64:
return SDS_HDR(64, s)->alloc;
}
return 0;
}
Did you understand what sdsalloc(), sdsavail(), sdslen() is all about in a flash?
Text-Code Sampling Explanation
1. Duplicate code can fix better
/* Helper for sdscatlonglong() doing the actual number -> string
* conversion. 's' must point to a string with room for at least
* SDS_LLSTR_SIZE bytes.
*
* The function returns the length of the null-terminated string
* representation stored at 's'. */
#define SDS_LLSTR_SIZE 21
int sdsll2str(char *s, long long value) {
char *p, aux;
unsigned long long v;
size_t l;
/* Generate the string representation, this method produces
* an reversed string. */
v = (value < 0) ? -value : value;
p = s;
do {
*p++ = '0'+(v%10);
v /= 10;
} while(v);
if (value < 0) *p++ = '-';
/* Compute length and add null term. */
l = p-s;
*p = '\0';
/* Reverse the string. */
p--;
while(s < p) {
aux = *s;
*s = *p;
*p = aux;
s++;
p--;
}
return l;
}
/* Identical sdsll2str(), but for unsigned long long type. */
int sdsull2str(char *s, unsigned long long v) {
char *p, aux;
size_t l;
/* Generate the string representation, this method produces
* an reversed string. */
p = s;
do {
*p++ = '0'+(v%10);
v /= 10;
} while(v);
/* Compute length and add null term. */
l = p-s;
*p = '\0';
/* Reverse the string. */
p--;
while(s < p) {
aux = *s;
*s = *p;
*p = aux;
s++;
p--;
}
return l;
}
In long or unsigned long convert to char *, the function is simple. Code at the end of the function
It can be reconstructed and reused.
inline int sdsreverse(char * s, char * p) {
/* Compute length and add null term. */
size_t l = p - s;
*p = '\0';
p--;
while (s < p) {
char aux = *s;
*s = *p;
*p = aux;
s++;
p--;
}
return (int)l;
}
/* Helper for sdscatlonglong() doing the actual number -> string
* conversion. 's' must point to a string with room for at least
* SDS_LLSTR_SIZE bytes.
*
* The function returns the length of the null-terminated string
* representation stored at 's'. */
#define SDS_LLSTR_SIZE 21
int sdsll2str(char * s, long long value) {
char * p;
unsigned long long v;
/* Generate the string representation, this method produces
* an reversed string. */
v = (value < 0) ? -value : value;
p = s;
do {
*p++ = '0' + (v % 10);
} while ((v /= 10));
if (value < 0) *p++ = '-';
return sdsreverse(s, p);
}
/* Identical sdsll2str(), but for unsigned long long type. */
int sdsull2str(char * s, unsigned long long v) {
/* Generate the string representation, this method produces
* an reversed string. */
char * p = s;
do {
*p++ = '0' + (v % 10);
} while ((v /= 10));
return sdsreverse(s, p);
}
Does it seem that old-school temperament is highlighted a lot?
2. Too hard state for vsnprintf
/* Like sdscatprintf() but gets va_list instead of being variadic. */
sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
va_list cpy;
char staticbuf[1024], *buf = staticbuf, *t;
size_t buflen = strlen(fmt)*2;
/* We try to start using a static buffer for speed.
* If not possible we revert to heap allocation. */
if (buflen > sizeof(staticbuf)) {
buf = s_malloc(buflen);
if (buf == NULL) return NULL;
} else {
buflen = sizeof(staticbuf);
}
/* Try with buffers two times bigger every time we fail to
* fit the string in the current buffer size. */
while(1) {
buf[buflen-2] = '\0';
va_copy(cpy,ap);
vsnprintf(buf, buflen, fmt, cpy);
va_end(cpy);
if (buf[buflen-2] != '\0') {
if (buf != staticbuf) s_free(buf);
buflen *= 2;
buf = s_malloc(buflen);
if (buf == NULL) return NULL;
continue;
}
break;
}
/* Finally concat the obtained string to the SDS string and return it. */
t = sdscat(s, buf);
if (buf != staticbuf) s_free(buf);
return t;
}
It's really too violent to expand by while vsnprintf. Better to watch man vsnprintf / Here you can see my submission
https://github.com/antirez/sds/pull/115/commits/51e2cd78b1a102055979ec9eb83766b8d2cd6927
/* Like sdscatprintf() but gets va_list instead of being variadic. */
sds sdscatvprintf(sds s, const char * fmt, va_list ap) {
int size;
va_list cpy;
char staticbuf[1024], * buf, * t;
/* Determine required size */
va_copy(cpy, ap);
size = vsnprintf(NULL, 0, fmt, cpy);
va_end(cpy);
if (size < 0) return NULL;
/* For '\0' */
size++;
/* We try to start using a static buffer for speed.
* If not possible we revert to heap allocation. */
if (size > sizeof(staticbuf)) {
buf = s_malloc(size);
if (buf == NULL) return NULL;
} else {
buf = staticbuf;
}
va_copy(cpy, ap);
size = vsnprintf(buf, size, fmt, cpy);
va_end(ap);
if (size < 0) {
if (buf != staticbuf) s_free(buf);
return NULL;
}
/* Finally concat the obtained string to the SDS string and return it. */
t = sdscat(s, buf);
if (buf != staticbuf) s_free(buf);
return t;
}
Don't ask, it's God.
3. sdssplitargs are difficult to see
/* Helper function for sdssplitargs() that returns non zero if 'c'
* is a valid hex digit. */
int is_hex_digit(char c) {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
(c >= 'A' && c <= 'F');
}
/* Helper function for sdssplitargs() that converts a hex digit into an
* integer from 0 to 15 */
int hex_digit_to_int(char c) {
switch(c) {
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
case 'a': case 'A': return 10;
case 'b': case 'B': return 11;
case 'c': case 'C': return 12;
case 'd': case 'D': return 13;
case 'e': case 'E': return 14;
case 'f': case 'F': return 15;
default: return 0;
}
}
/* Split a line into arguments, where every argument can be in the
* following programming-language REPL-alike form:
*
* foo bar "newline are supported\n" and "\xff\x00otherstuff"
*
* The number of arguments is stored into *argc, and an array
* of sds is returned.
*
* The caller should free the resulting array of sds strings with
* sdsfreesplitres().
*
* Note that sdscatrepr() is able to convert back a string into
* a quoted string in the same format sdssplitargs() is able to parse.
*
* The function returns the allocated tokens on success, even when the
* input string is empty, or NULL if the input contains unbalanced
* quotes or closed quotes followed by non space characters
* as in: "foo"bar or "foo'
*/
sds *sdssplitargs(const char *line, int *argc) {
const char *p = line;
char *current = NULL;
char **vector = NULL;
*argc = 0;
while(1) {
/* skip blanks */
while(*p && isspace(*p)) p++;
if (*p) {
/* get a token */
int inq=0; /* set to 1 if we are in "quotes" */
int insq=0; /* set to 1 if we are in 'single quotes' */
int done=0;
if (current == NULL) current = sdsempty();
while(!done) {
if (inq) {
if (*p == '\\' && *(p+1) == 'x' &&
is_hex_digit(*(p+2)) &&
is_hex_digit(*(p+3)))
{
unsigned char byte;
byte = (hex_digit_to_int(*(p+2))*16)+
hex_digit_to_int(*(p+3));
current = sdscatlen(current,(char*)&byte,1);
p += 3;
} else if (*p == '\\' && *(p+1)) {
char c;
p++;
switch(*p) {
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'b': c = '\b'; break;
case 'a': c = '\a'; break;
default: c = *p; break;
}
current = sdscatlen(current,&c,1);
} else if (*p == '"') {
/* closing quote must be followed by a space or
* nothing at all. */
if (*(p+1) && !isspace(*(p+1))) goto err;
done=1;
} else if (!*p) {
/* unterminated quotes */
goto err;
} else {
current = sdscatlen(current,p,1);
}
} else if (insq) {
if (*p == '\\' && *(p+1) == '\'') {
p++;
current = sdscatlen(current,"'",1);
} else if (*p == '\'') {
/* closing quote must be followed by a space or
* nothing at all. */
if (*(p+1) && !isspace(*(p+1))) goto err;
done=1;
} else if (!*p) {
/* unterminated quotes */
goto err;
} else {
current = sdscatlen(current,p,1);
}
} else {
switch(*p) {
case ' ':
case '\n':
case '\r':
case '\t':
case '\0':
done=1;
break;
case '"':
inq=1;
break;
case '\'':
insq=1;
break;
default:
current = sdscatlen(current,p,1);
break;
}
}
if (*p) p++;
}
/* add the token to the vector */
vector = s_realloc(vector,((*argc)+1)*sizeof(char*));
vector[*argc] = current;
(*argc)++;
current = NULL;
} else {
/* Even on empty input string return something not NULL. */
if (vector == NULL) vector = s_malloc(sizeof(void*));
return vector;
}
}
err:
while((*argc)--)
sdsfree(vector[*argc]);
s_free(vector);
if (current) sdsfree(current);
*argc = 0;
return NULL;
}
/* Free the result returned by sdssplitlen(), or do nothing if 'tokens' is NULL. */
void sdsfreesplitres(sds *tokens, int count) {
if (!tokens) return;
while(count--)
sdsfree(tokens[count]);
s_free(tokens);
}
The sdssplitargs function is hard to understand at first, but I've written a demo to share with you
#include <stdio.h>
#include "sds.h"
int main(int argc, char * argv[]) {
int count;
const char * line = " hset name \"name:filed\" \"value:field\" ";
sds * tokens = sdssplitargs(line, &count);
printf("line = [%s], count = [%d]\n", line, count);
for (int i = 0; i < count; i++) {
printf("tokens[%d] = [%s]\n", i, tokens[i]);
}
sdsfreesplitres(tokens, count);
return 0;
}
Output
line = [ hset name "name:filed" "value:field" ], count = [4]
tokens[0] = [hset]
tokens[1] = [name]
tokens[2] = [name:filed]
tokens[3] = [value:field]
Is it Instant Enlightenment - > sdssplitargs is exactly like that!
Writing it down as a whole, the impression is that antirez sds is very smooth and has no fancy places.
Interested friends can get a lot from writing and writing ~
Postnote - Keep it up
Errors are inevitable. Welcome to share ~
Be one's unshirkable responsibility - https://music.163.com/#/song?id=1363553512
Gulang Moon Line (Tang Li Bai) Hours do not know the moon and call for a white Jade plate. Also suspect Yao Tai mirror, flying in the clouds. The immortals hung on their feet, and the cinnamon trees formed a group. White rabbit is made of mashed medicine and asks who to eat with. Bufo rotates its shadow and is disabled at night. Yixi falls into nine clouds, and heaven and man are clean and safe. Yin Jing is confused and neglected. How sad it is, destroying the heart and liver.