0. Preface
The processing of characters and strings in C language is very frequent, but C language itself has no string type. Strings are usually placed in constant strings or character arrays.
String constants apply to string functions that do not modify them
1. Function introduction
strlen
#include<stdio.h> #include<assert.h> int my_strlen(const char* str)//Do not want the arr content to be modified { assert(str != NULL);//Assert //char* end = str;// If the secure STR is handed over to the secure end, a warning will be given, and const will be added to the end const char* end = str; while (*end != '\0') { end++; } return end - str;//Pointer - the pointer gets the number of elements } int main() { char arr[] = "abcdef"; int len = my_strlen(arr); printf("%d\n", len); return 0; }
The return value of strlen in the library function is size_t. That is, unsigned int
size_t also has its disadvantages
#include<stdio.h> #include<string.h> int main() { if (strlen("abc") - strlen("abcdef") < 0) { printf("1\n"); } else { printf("2\n");//2, size will be output_ T always returns a positive number } return 0; } //Solution: 1.hold strlen Return value force type conversion to int 2.Or use it first strlen Calculate the result, and then calculate the result
Note: the string pointed to by the parameter must end with '\ 0'.
String function with unlimited length
strcpy
char* strcpy(char * destination, const char * source ); Copies the C string pointed by source into the array pointed by destination, including the terminating null character (and stopping at that point). Return Value Each of these functions returns the destination string. No return value is reserved to indicate an error.
-
Copy string
-
The source string must end with '\ 0'.
-
The '\ 0' in the source string will be copied to the destination space.
-
The destination space must be large enough to hold the source string.
-
The target space must be modifiable. Cannot be a constant string
printf will stop when it encounters \ 0 and will not print \ 0
Simulation Implementation
#include <stdio.h> #include<assert.h> char* my_strcpy(char* dest, const char* src) //*src cannot be modified { assert(dest && src); char* ret = dest; while (*dest++ = *src++) { ;//Copy first and then++ //Both copy and stop the slash 0 } return ret; } int main() { char arr1[20] = { "xxxxxxxxxxxxx" }; char arr2[] = { "hello" }; printf("%s\n", my_strcpy(arr1, arr2));//Chain access return 0; }
About pointers:
- If the pointer does not know why it is assigned, it is assigned NULL
- After the pointer is used, it should be assigned NULL
strcat
char*strcat ( char* destination, const char*source ); Appends a copy of the source string to the destination string. The terminating null character in destination is overwritten by the first character of source, and a null character is included at the end of the new string formed by the concatenation of both in destination. Return Value Each of these functions returns the destination string (strDestination). No return value is reserved to indicate an error.
- The source string must end with '\ 0'.
- The target space must be large enough to accommodate the contents of the source string.
- The target space must be modifiable.
Simulated implementation of strcat
#include<stdio.h> #include<string.h> #include<assert.h> char* my_strcat(char* dest, const char* src) { char* ret = dest; assert(dest && src);//Determine whether it is a null pointer //1. Find the target space first \ 0 while (*dest) { dest++; } //2. Add content to the target space while (*dest++ = *src++) { ; } return ret; } int main() { char arr1[30] = "hello"; char arr2[] = " world"; printf("%s\n", my_strcat(arr1, arr2)); return 0; }
Library function source code:
char * __cdecl strcat (char * dst, const char * src) { char * cp = dst; while( *cp ) cp++; /* find end of dst */ while((*cp++ = *src++) != '\0') ; /* Copy src to end of dst */ return( dst ); /* return dst */ } //__ cdecl is a calling convention, never mind it
strcmp
int strcmp( const char *string1, const char *string2 ); Return Value < 0 string1 less than string2 0 string1 identical to string2 > 0 string1 greater than string2
If you compare strings directly with = =
if("abc" == "abq"); It's better here a Your address and another a The addresses of must not be equal
#include<stdio.h> #include<string.h> int main() { char arr1[] = "abc"; char arr2[] = "abd"; int ret = strcmp(arr1, arr2); printf("%d\n", ret);//-1 return 0; }
Simulation Implementation
#include<stdio.h> #include<string.h> #include<assert.h> int my_strcmp(const char* str1, const char* str2) { assert(str1 && str2); while (*str1 == *str2) { if (*str1 == '\0') { return 0; } str1++; str2++; } //In fact, the ASCII value size is compared if (*str1 > *str2) { return 1; } else { return -1; } } int main() { char arr1[] = "abc"; char arr2[] = "abd"; int ret1 = strcmp(arr1, arr2); int ret2 = my_strcmp(arr1, arr2); printf("%d\n", ret1);//-1 printf("%d\n", ret2);//-1 return 0; }
Improve:
if (*str1 > *str2) { return 1; } else { return -1; } Change to return *str1 - *str2;
Library function source code:
int __cdecl strcmp( const char* src, const char* dst ) { int ret = 0; while ((ret = *(unsigned char*)src - *(unsigned char*)dst) == 0 && *dst) { ++src, ++dst; } return ((-ret) < 0) - (ret < 0); // (if positive) - (if negative) generates branchless code }
String function with limited length
strncpy
char *strncpy( char *strDest, const char *strSource, size_t count ); count Number of characters to be copied Return Value Each of these functions returns strDest. No return value is reserved to indicate an error.
If the source data is not enough to count, the excess will be replaced by \ 0
int main() { char arr1[] = "xxxxxxxxxx"; char arr2[] = "hello world"; strncpy(arr1, arr2, 5); printf("%s\n", arr1);//helloxxxxx return 0; }
int main() { char arr1[] = "xxxxxxxxxx"; char arr2[] = "he"; strncpy(arr1, arr2, 5); printf("%s\n", arr1);//he return 0; }
strncat
char *strncat( char *strDest, const char *strSource, size_t count );
After appending, it will actively put a \ 0 in the back
int main() { char arr1[20] = "helloxxxxxxx"; char arr2[] = "hello"; strncat(arr1, arr2, 5); printf("%s\n", arr1);//helloxxxxxxxhello return 0; }
Append from \ 0 and overwrite the beginning \ 0
int main() { char arr1[20] = "hello\0xxxxxx"; char arr2[] = "hello"; strncat(arr1, arr2, 5); printf("%s\n", arr1);//hellohello return 0; }
If the number of additional elements is greater than the number of metadata elements
int main() { char arr1[20] = "hello\0xxxxxx"; char arr2[] = "hello"; strncat(arr1, arr2, 7); printf("%s\n", arr1);//hellohello return 0; } //It is only appended to \ 0. After the source data is appended, it will not be appended again
strncmp
int strncmp( const char *string1, const char *string2, size_t count );
int main() { char arr1[] = "abcdef"; char arr2[] = "abcqqqq"; int ret = strncmp(arr1, arr2, 4);//If d is less than q, it should return - 1 printf("%d\n", ret);//-1 return 0; }
strstr
String lookup function
char *strstr( const char *string, const char *strCharSet ); Find a substring. Return Value Each of these functions returns a pointer to the first occurrence of strCharSet in string, or NULL if strCharSet does not appear in string. If strCharSet points to a string of zero length, the function returns string.//Returns the first found address
#include<stdio.h> #include<string.h> int main() { char arr1[] = "abcdefabcdef"; char arr2[] = "bcd"; char* ret = strstr(arr1, arr2); if (NULL == ret) { printf("can't find\n"); } else { printf("%s\n", ret);//bcdefabcdef } return 0; }
Simulation implementation str
abbbcdefbbcdef
bbc
When the first match fails, you have to go back to the second b of str and start again. However, if you directly move str and substr, you can't find the starting position, so use s1 and s2 instead
Every time the search fails, you need to move one more step at the beginning of the next time, so you need to have a cur++
Just draw the picture below
char* my_strstr(const char* str, const char* substr) { const char* s1 = str; const char* s2 = substr; const char* cur = str; assert(str && substr); if (*substr == '\0') { return (char*)str; //str itself is a safe pointer with const decoration. If you do not force type conversion, it will be returned as char * and this unsafe pointer will report a warning } while (*cur != '\0') { s1 = cur; s2 = substr; while (*s1 != '\0' && *s2 != '\0' && *s1==*s2)//Priority = = >! = >&& //Can be optimized to while (* S1 & & * S2 & & * S1 = = * S2) { s1++; s2++; } if (*s2 == '\0') { return (char*)cur; } cur++; } return NULL;//cur can't be found from beginning to end } int main() { char arr1[] = "abbbcdefbbcdef"; char arr2[] = "bbc"; char* ret = my_strstr(arr1, arr2); if (NULL == ret) { printf("can't find\n"); } else { printf("%s\n", ret);//bbcdefbbcdef } return 0; }
Disadvantages: the efficiency of the algorithm is low, which can be realized by KMP algorithm
Library function source code
#include<string.h> char* strstr(register const char* s1, register const char* s2) { while(s1 && *s1) { if(strncmp(s1, s2, strlen(s2)) == 0) return ( char*)s1; s1 = strchr(s1+1, *s2); } return NULL; }
KMP algorithm
The core of an improved string matching algorithm is to use the information after matching failure to minimize the matching times between pattern string and main string, so as to achieve the purpose of fast matching
1. Why does the main string i not go back
2.j's fallback position
Purpose: i do not retreat, j retreat to a specific position
Question: suppose there is such a string, how can we determine the position of this j fallback
Because i does not fallback, try to find a part of the string matching the substring in the main string
So for the first time, j goes back to the position of 2
next array: save the fallback position of a substring after a position matching fails
Find next array:
If two equal true substrings cannot be found, then 0 is placed in next
If found, the length of the two strings will be put in the next array
a b a b c a b c d a b c d e
-1 0 0 1 2 0 1 2 0 0 1 2 0 0
Add up to 1 at a time
Assuming next[i] = k, deduce the formula:
And the length is equal, that is, k-1-0 = i-1-x
Then x = i-k
If p[i] == p[k], the middle is the p array, the 8 subscript is a, and the 3 subscript is also a
Add p[k] to the left and p[i] to the right
Then, on the basis of next[i] = k and p[i] == p[k], it is deduced that next[i+1] = k+1
If P [i]= What about P [k]?
Then go back until you find p[i] == p[k], and you can use the formula next[i+1] = k+1
next[6] = 1
KMP implementation
#include<stdio.h> #include<assert.h> #include<string.h> #include<stdlib.h> void GetNext(const char* sub, int* next, int lenSub) { next[0] = -1; if (lenSub == 1)//There is only one element, and the next array can only be assigned one { return; } next[1] = 0; int i = 2; int k = 0;//i k of the previous item //Note the difference between manually calculating the next array and using code to calculate the next array. i has not been calculated, so i-1 must be calculated first //Manual calculation: p[i] == p[k] - next[i+1] = k+1 //Code calculation: p[i-1] == p[k] - "next[i] = k+1 while (i<lenSub) { if (k == -1 || sub[i-1] == sub[k]) { next[i] = k + 1; i++; k++; } else//If it is not equal, K needs to go back to the subscript corresponding to next, and then see whether p[i-1] == p[k] is true //If k always goes back to - 1, then k is out of bounds, and it means that two equal substrings cannot be found in the middle, that is, next[i]=0 { k = next[k]; } } } int KMP(const char* str, const char* substr, int pos) { assert(str && substr); int lenStr = strlen(str); int lenSub = strlen(substr); if (lenStr == 0 || lenSub == 0) { return -1; } if (pos < 0 || pos >= lenStr) { return -1; } int* next = (int*)malloc(sizeof(int)*lenSub);//Open up the next array of corresponding size assert(next != NULL); GetNext(substr, next, lenSub); int i = pos;//Traversal main string int j = 0;//Traversal substring while (i < lenStr && j < lenSub) { if (j == -1 || str[i] == substr[j]) { i++; j++; } //Note that if the first character fails to match, j will return to - 1, resulting in array out of bounds //At this time, j just needs + + to return to 0, and i should also point to the next one else { j = next[j];//Unequal fallback to the corresponding j subscript in the next array } } free(next);//next empty if (j >= lenSub) { return i - j;//eureka } return -1;//Not found after traversal } int main() { printf("%d\n", KMP("ababcabcdabcde", "abcd", 0));//5 printf("%d\n", KMP("ababcabcdabcde", "abcdf", 0));//-1 not found printf("%d\n", KMP("ababcabcdabcde", "ab", 0));//0 from the beginning return 0; }
Code count next array
k back to - 1
next array optimization
0 1 2 3 4 5 6 7 8 a a a a a a a a b -1 0 1 2 3 4 5 6 7
If the subscript matching fails at 5, it will return to position 4, return to position 3, and return to position 0 all the way
Why not go back to position 0 in one step? The front characters are the same. The fifth character does not match, and the front must not match either
nextval array:
0 1 2 3 4 5 6 7 8 a a a a a a a a b -1 0 1 2 3 4 5 6 7 --next value -1 -1 -1 -1 -1 -1 -1 -1 7 --nextval value
1. The position of fallback to is the same as the current character. Write the next value of fallback to that position
2. If the fallback position is different from the current character, write the original next value of the current character
The first next value of the option answer starts from 0, so + 1 is required
strtok
char *strtok( char *strToken, const char *strDelimit );
- SEPs are character sets defined as parameters
- The first parameter specifies a string that contains 0 or more tags separated by one or more separators in the sep string.
- The strtok function finds the next tag in str, ends it with \ 0, and returns a pointer to this tag. (Note:
- The strtok function will change the string to be manipulated, so the string segmented by the strtok function is generally a temporary copy of the content and can be modified.)
- The first parameter of the strtok function is not NULL. The function will find the first tag in str, and the strtok function will save its position in the string// It is speculated that static variables should be used
- The first parameter of strtok function is NULL. The function will start at the position saved in the same string to find the next tag.
- A NULL pointer is returned if there are no more tags in the string.
- When the first parameter of the function is found to be NULL, the first parameter of the function is not strk
- When the strtok function finds a non first tag, the first parameter of the function is NULL
Very bad usage
#include<stdio.h> #include<string.h> int main() { const char* p = "@.";//This does not need to be distinguished in order char arr[] = "yzq2076188013@qq.com"; char buff[50] = { 0 }; strcpy(buff, arr); char* str = strtok(buff, p);//yzq2076188013 printf("%s\n", str); str = strtok(NULL, p);//qq printf("%s\n", str); str = strtok(NULL, p);// com printf("%s\n", str); return 0; }
Elegant use
//Clever use of for loop int main() { const char* p = "@.";//This does not need to be distinguished in order char arr[] = "yzq2076188013@qq.com"; char buff[50] = { 0 }; strcpy(buff, arr); char* str = NULL; for (str = strtok(buff, p); str != NULL; str = strtok(NULL, p)) { printf("%s\n",str); } return 0; }
strerror
Error code
c Some information is specified in the language Error code - error message 0 - "No Error" 1 - 2 - 3 - strerror Error codes can be translated into error messages
int main() { for (size_t i = 0; i < 10; i++) { printf("%s\n", strerror(i)); } return 0; } No error Operation not permitted No such file or directory No such process Interrupted function call Input/output error No such device or address Arg list too long Exec format error Bad file descriptor
fopen
FILE *fopen( const char *filename, const char *mode ); Each of these functions returns a pointer to the open file. A null pointer value indicates an error.
c language can operate files
Open file – fopen
When the library function is used, if an error occurs, the global error variable errno will be set as the error code generated by this execution of the library function
Errno is a global variable provided by c language, which can be used directly and placed in errno H file
Character classification function
function | If his parameters meet the following conditions, it returns true |
---|---|
iscntrl | Any control character |
isspace | Blank characters: space ',' page feed '\ f', line feed '\ n', carriage return '\ r', tab '\ t' or vertical tab '\ v' |
isdigit | Decimal digits 0 ~ 9 |
isxdigit | Hexadecimal digits, including all decimal digits, lowercase letters a - F and uppercase letters a - F |
islower | Small letter a~z |
isupper | Capital letters A~Z |
isalpha | Letters a - Z or a - Z |
isalnum | Letters or numbers, a - z,A - Z,0 - 9 |
ispunct | Punctuation mark, any graphic character not belonging to numbers or letters (printable) |
isgraph | Any graphic character |
isprint | Any printable character, including graphic characters and white space characters |
#include<stdio.h> #include<ctype.h> int main() { char ch = 'w'; if (isspace(ch)) { //White space character returns non-zero printf("%d\n", isspace(ch)); } else { printf("%d\n", isspace(ch));//0 //Non white space characters return 0 } return 0; }
#include<stdio.h> #include<ctype.h> int main() { char ch = '0'; if(ch >= '0' && ch <= '9') { //... } if (isdigit(ch)) { //... In this way, the code is more unified } return 0; }
Character conversion
<stdlib.h> and <ctype.h> int tolower ( int c ); int toupper ( int c );
#include<stdio.h> #include<ctype.h> int main() { char ch = 0; ch = getchar(); if (islower(ch)) { ch = toupper(ch); } else { ch = tolower(ch); } printf("%c\n", ch); return 0; }
2. Memory function
memcpy
Memory copy function void *memcpy( void *dest, const void *src, size_t count ); void*Can receive any type of data memcpy returns the value of dest count Number of bytes to copy
#include<stdio.h> #include<memory.h> #include<string.h> int main() { char arr1[] = "abcdef"; char arr2[] = { 0 }; strcpy(arr2,arr1);//Copy string int arr3[] = { 1,2,3,4,5,6,7,8,9,10 }; int arr4[5] = { 0 }; memcpy(arr4, arr3, 20); for (size_t i = 0; i < 5; i++) { printf("%d ", arr4[i]);//1 2 3 4 5 } return 0; }
Simulation Implementation
#include<stdio.h> #include<memory.h> #include<string.h> #include<assert.h> void* my_memcpy(void* dest, const void* src, size_t num) { void* ret = dest; assert(dest && src); while (num--) { *(char*)dest = *(char*)src;//Cast is only temporary and does not change the type of dest dest = (char*)dest + 1;;//void * cannot be used directly++ src = (char*)src + 1; } return ret; } int main() { int arr3[] = { 1,2,3,4,5,6,7,8,9,10 }; int arr4[5] = { 0 }; my_memcpy(arr4, arr3+5, 5*sizeof(arr3[0])); for (size_t i = 0; i < 5; i++) { printf("%d ", arr4[i]);// } return 0; }
Defects:
#include<stdio.h> #include<memory.h> #include<string.h> #include<assert.h> void* my_memcpy(void* dest, const void* src, size_t num) { void* ret = dest; assert(dest && src); while (num--) { *(char*)dest = *(char*)src;//Cast is only temporary and does not change the type of dest dest = (char*)dest + 1;;//void * cannot be used directly++ src = (char*)src + 1; } return ret; } void test1() { //Put the 12345 copy from arr3 into 34567 int arr3[] = { 1,2,3,4,5,6,7,8,9,10 }; int arr4[5] = { 0 }; my_memcpy(arr3+2, arr3, 5 * sizeof(arr3[0])); for (size_t i = 0; i < 10; i++) { printf("%d ", arr3[i]);//The result turned out to be 1 2 1 2 1 2 1 8 9 10? } } int main() { test1(); return 0; } //Because it is a byte by byte copy, copy 1 and 2 into 3 and 4 first. When you continue to copy from 3, what you take out is still 1
Optimization: when using memmove function to copy memory functions, they can overlap
memmove
void *memmove( void *dest, const void *src, size_t count );
void test1() { //Put the 12345 copy from arr3 into 34567 int arr3[] = { 1,2,3,4,5,6,7,8,9,10 }; int arr4[5] = { 0 }; // my_memcpy(arr3 + 2, arr3, 5 * sizeof(arr3[0])); memmove(arr3+2, arr3, 5 * sizeof(arr3[0])); for (size_t i = 0; i < 10; i++) { printf("%d ", arr3[i]);//1 2 1 2 3 4 5 8 9 10 } } int main() { test1(); return 0; }
In fact, C language only requires
memcpy can copy non overlapping memory space
memmove handles those overlapping memory copies
memmove includes the function of memcpy
However, VS memcpy can also handle overlapping memory copies
void test1() { //Put the 12345 copy from arr3 into 34567 int arr3[] = { 1,2,3,4,5,6,7,8,9,10 }; int arr4[5] = { 0 }; // my_memcpy(arr3 + 2, arr3, 5 * sizeof(arr3[0])); // memmove(arr3 + 2, arr3, 5 * sizeof(arr3[0])); memcpy(arr3+2, arr3, 5 * sizeof(arr3[0])); for (size_t i = 0; i < 10; i++) { printf("%d ", arr3[i]);//1 2 1 2 3 4 5 8 9 10 } } int main() { test1(); return 0; }
Simulation Implementation
#include<stdio.h> #include<memory.h> #include<string.h> #include<assert.h> void* my_memmove(void* dest, const void* src, size_t num) { void* ret = dest; assert(dest && src); //Copy from front to back if (dest < src) { while (num--) { *(char*)dest = *(char*)src; dest = (char*)dest + 1; src = (char*)src + 1; } } //Copy back to front else { while (num--)//20 becomes 19 { *((char*)dest + num) = *((char*)src + num); } } return ret; } void test1() { //Put the 12345 copy from arr3 into 34567 int arr3[] = { 1,2,3,4,5,6,7,8,9,10 }; int arr4[5] = { 0 }; my_memmove(arr3+2, arr3, 5 * sizeof(arr3[0])); for (size_t i = 0; i < 10; i++) { printf("%d ", arr3[i]);//1 2 1 2 3 4 5 8 9 10 } } int main() { test1(); return 0; }
memset
void *memset( void *dest, int c, size_t count ); Sets the memory function in bytes
#include<stdio.h> #include<memory.h> int main() { char arr[20] = { 0 }; memset(arr, 'x', 10);//The first 10 characters are changed to x for (size_t i = 0; i < 20; i++) { printf("%c ", arr[i]);//x x x x x x x x x x } return 0; }
//It's also OK for integer arrays. Integers are 4 bytes. memset is modified byte by byte. You need to pay attention to the size end #include<stdio.h> #include<memory.h> int main() { int arr[10] = { 1,2,3,4,5,6,7,8,9,10 }; memset(arr, 0, 10); for (size_t i = 0; i < 10; i++) { printf("%d ", arr[i]);//0 0 0 4 5 6 7 8 9 10 } //01 00 00 00 02 00 00 00 03 00 00 00 ... Small end storage //00 00 00 00 00 00 00 00 00 00 00 00 ... return 0; }
memcmp
int memcmp( const void *buf1, const void *buf2, size_t count ); Return Value : Relationship of First count Bytes of buf1 and buf2 < 0 buf1 less than buf2 0 buf1 identical to buf2 > 0 buf1 greater than buf2
#include<stdio.h> #include<memory.h> int main() { int arr1[] = { 1,2,3,4,5 }; int arr2[] = { 1,2,3,4,5 }; int ret = memcmp(arr1, arr2, 8); printf("%d\n", ret);//0 The first 8 bytes are equal and return 0 return 0; }