C language - string function, memory function notes

Posted by mostwantedunm on Mon, 20 Sep 2021 02:01:15 +0200

Sort out the commonly used library functions for processing characters and strings, as well as the usage, precautions and partial simulation implementation of memory functions

strlen

This function receives a char * type parameter and returns the number of characters before string \ 0. Note that the return type is size_t-shaped

//An error prone point about the return value of strlen
int main()
{
	const char* str1 = "abcdef";
	const char* str2 = "bbb";
	if (strlen(str2) - strlen(str1) > 0)
	{
		printf("str2>str1\n");
	}
	else
	{
		printf("srt1>str2\n");
	}
	return 0;
}

strlen Simulation Implementation

FA Yi
Use counter

size_t my_strlen1(const char* str)
{
	assert(str);
	int count = 0;
	while (*str++)
	{
		count++;
	}
	return count;
}

Method II
Pointer subtraction

size_t my_strlen2(const char* str)
{
	assert(str);
	char* start = str;
	while (*str!='\0')//Note that * str + + cannot be written in this way; Here we should judge first and then + +* str + + is written with + 1 at '\ 0'
	{
		str++;
	}
	return str - start;//\The difference between 0 and the starting position is the number of characters
}

Method three
Recursive, not applicable to temporary variables

size_t my_strlen3(const char* str)
{
	if ('\0' == *str)
		return 0;
	else
		return 1 + my_strlen3(str + 1);
}

strcpy

This function receives two char * type parameters and copies the last string to the previous string, including \ 0. Note that the array space pointed to by the previous pointer must be large enough, and the copied content must contain \ 0

Simulation Implementation of strcpy

char* my_strcpy(char* dest, const char* src)
{
	assert(dest && src);
	char* ret = dest;
	while (*dest++ = *src++)
	{
		;
	}
	return ret;
}
int main()
{
	char arr1[20] = "hello underworld";//Note that it is written as an array
	char arr2[20] = "hello world";

	printf("%s\n", arr2);
	printf("%s\n", my_strcpy(arr2, arr1));

	return 0;
}

strcat

This function receives two char * parameters, starts copying the next string at the position of the previous string \ 0, and returns the first address of the previous string until \ 0 of the next string. Be careful to ensure that the space pointed to by the previous pointer is large enough

Simulation Implementation of strcat

char* my_strcat(char* dest, const char* src)
{
	char* ret = dest;
	assert(dest && src);
	while (*dest)//Let dest reach the \ 0 position of str1
	{
		dest++;
	}
	while (*dest++ = *src++)//This part is the same as strcpy
	{
		;
	}
	return ret;
}
int main()
{
	char arr1[20] = "hello ";
	char arr2[20] = "underworld";

	printf("%s\n", my_strcat(arr1, arr2));

	return 0;
}

strcmp

Receive two char * parameters, compare each character in turn, and compare their coding values at the first unequal character. If the former is large, it will return a number greater than 0, if the former is small, it will return a number less than 0, and if the strings are completely equal, it will return 0

Implementation of strcmp simulation

int my_strcmp(const char* str1, const char* str2)
{
	while (*str1 == *str2)
	{
		if (*str1 == '\0')//Indicates that two strings reach the end tag at the same time
			return 0;
		str1++;
		str2++;
	}
	return *str1 - *str2;//If it is not returned inside the loop, it must not be equal, and character subtraction can reflect the size
}
int main()
{
	char *str1 = "hello world";
	char *str2 = "hello underworld";

	printf("%d\n", my_strcmp(str1, str2));//w is larger than u

	return 0;
}

strstr

Receive two char * parameters and return the first pointer of the second string at the first occurrence of the first string

STR Simulation Implementation

char* my_strstr(const char* str1, const char* str2)
{
	assert(str1 && str2);

	char* s1;//s1 maintenance str1
	char* s2;//s2 maintenance str2
	char* cp = str1;//cp is used to record the beginning of the comparison

	if (*str2 == '\0')//exceptional case
		return str1;

	while (*cp)
	{
		s1 = cp;
		s2 = str2;

		while (*s1 != '\0' && *s2 != '\0' && *s1 == *s2)//In fact, * s1=' S1 = '\ 0'' and * S2! =\ There is no need to compare at '0'
			//*s1==*s2 makes the two maintenance pointers + 1 respectively; Let cp+1,s1 and s2 reset respectively without waiting		
		{
			s1++;
			s2++;
		}
		if (*s2 == '\0')//*s2=='*S2 = = '\ 0'' description found
		{
			return cp;
		}
		cp++;
	}

	return NULL;
}

STR using KMP algorithm

void get_next(char* str, int* next)
{
	int i, k;
	i = 0;
	k = -1;
	next[0] = -1;//This value is useless; Or is it to increase i without increasing j
	int len = strlen(str);
	while (i < len - 1)//The maximum subscript of the next array is the string length minus 1. The array length is the same as the string length
	{
		if (k == -1 || *(str + i) == *(str + k))
		{
			++i;
			++k;
			next[i] = k;
		}
		else
			k = next[k];
	}

	//Test print next
	int z;
	printf("next:");
	for (z = 0; z < len; z++)
	{
		printf("%d ", next[z]);
	}
	printf("\n");
}

int Index_KMP(char* str1, char* str2, int pos)
{
	int i = pos;
	int j = 0;
	int next[255];
	get_next(str2, next);
	int len1 = strlen(str1);
	int len2 = strlen(str2);
	int count = 0;
	while (i < len1 && j < len2)//i is from 0 to 10(len1=11), 11 times in total. However, considering the traceback of else, the single character search cycle is 22 times in total
	{
		count++;
		if (j == -1 || *(str1 + i) == *(str2 + j))//Judge first, then add 1 to the subscript
		{
			++i;
			++j;
		}
		else
		{
			j = next[j];
		}
	}
	printf("i=%d\n", i);
	printf("j=%d\n", j);
	printf("count=%d\n", count);//If there is backtracking, is the time complexity of this function O(m)?
	//If ((len2! = 1) & & (J > = (len2-1)) / / defective, unable to handle the case where len2=1
	if (j >= (len2 - 1))//In the case of single character search, j=0 at the end of while, and len2-1 = 0, so it can not be used as the flag found
		//For cases other than single character search, len2-1 must be greater than 0. Len2-1 represents the subscript of the last character of the target string, since j
		//When you reach this position, it means that it is a perfect match
		return i - len2;//When len2=1 due to the difference between the string length and the array subscript
	else
		return 0;
}
int main()
{
	char* str1 = "hello underworld!";
	char* str2 = "under";

	printf("%s\n", my_strstr(str1, str2));
	printf("%s\n", *(str1+Index_KMP(str1, str2, 0)));

	return 0;
}

strncpy

There is one more parameter than strcpy to describe the number of bytes copied. If it is more than the length of str2, it will be supplemented with 0

int main()
{
	char arr1[20] = "abcdefghi";
	char arr2[] = "xxxx";

	//strncpy(arr1, arr2, 6);// Copy 6 characters from arr2 to Arr1? If the length of arr2 is not enough, supplement 0
	//strncpy(arr1, arr2, 3);// Not enough length to copy \ 0
	//strncpy(arr1, arr2, 4);
	strncpy(arr1, arr2, 5);

	printf("%s\n", arr1);
	return 0;
}

strncat

There is one more parameter than strcat, and only the complete str2 (including \ 0) can be copied at most

int main()
{
	char arr1[20] = "abc\0xxxxxxx";
	char arr2[] = "def";

	//strncat(arr1, arr2, 6);// Six characters of Arr1 followed by arr2? At most, only strings as long as arr2 can be followed, including \ 0
	//strncat(arr1, arr2, 3);// You will add \ 0
	strncat(arr1, arr2, 2);

	printf("%s\n", arr1);
	return 0;
}

strncmp

There is one more parameter than strcmp to describe the number of bytes compared

int main()
{
	char arr1[] = "abcdew";
	char arr2[] = "abcdeqj";

	printf("%d\n",strncmp(arr1, arr2, 5));
	printf("%d\n",strncmp(arr1, arr2, 6));

	return 0;
}

strtok

The string segmentation function receives two char * parameters. The first is the string to be segmented and the second is the delimiter. The order of the delimiters is not important; When the first parameter is not NULL, the first segment of segmentation is returned; The first parameter is NULL, and the next segment will be searched from the previous position

int main()
{
	char arr1[] = "cjh@scu.edu";
	char arr2[100] = { 0 };//Save temporary data
	char sep[] = "@.";
	char* ret = NULL;//Receive the return value of strtok
	strcpy(arr2, arr1);
	for (ret = strtok(arr2, sep); ret != NULL; ret = strtok(NULL, sep))
	{
		printf("%s\n", ret);
	}

	return 0;
}
int main()
{
	char str[] = "- This, a sample string.";
	char* pch;
	printf("Splitting string \"%s\" into tokens:\n", str);
	pch = strtok(str, ", .-");//The position of the separator mark is not important
	while (pch != NULL)
	{
		printf("%s\n", pch);
		pch = strtok(NULL, " ,.-");//Notice the space here
	}
	return 0;
}

memcpy

Receive three parameters, the first is the target location of the char, the second is the data source of the copied char, and the last is size_ Number of bytes copied by T. Note that the standard does not define the result of copying its own content by itself.

memcpy Simulation Implementation

void* my_memcpy(void* dest, void* src, size_t count)
{
	void* ret = dest;
	assert(dest && src);

	while (count--)
	{
		*(char*)dest = *(char*)src;
		dest = (char*)dest + 1;
		src = (char*)src + 1;
	}
	//printf("%d\n", count);//count=-1
	return ret;
}
int main()
{
	int arr1[10] = { 1,2,3,4,5,6,7,8,9,10 };
	int arr2[20] = { 0 };

	my_memcpy(arr2, arr1, 10 * sizeof(int));

	int i;
	for (i = 0; i < 20; i++)
	{
		printf("%d ", arr2[i]);
	}

	return 0;

memmove

Like memcpy, this function prototype includes the function of memcpy, and can handle the scenario of copying your own content to yourself

#include <stdio.h>
#include <string.h>
int main()
{
	char str[] = "memmove can be very useful......";
	printf("%c\n", *(str + 15));
	printf("%c\n", *(str + 20));

	memmove(str + 20, str + 15, 11);//Note that memmove and memcpy will not encounter \ 0 stop. When to stop depends on the third parameter
	puts(str);
	return 0;
}

memmove Simulation Implementation

void* my_memmove(void* dest, void* src, size_t count)//The key is to determine whether overlap will occur before copying
{
	void* ret = dest;
	if (dest <= src || (char*)dest >= ((char*)src + count))
	{
		while (count--)
		{
			*(char*)dest = *(char*)src;
			dest = (char*)dest + 1;
			src = (char*)src + 1;
		}
	}
	else
	{
		dest = (char*)dest + count - 1;
		src = (char*)src + count - 1;
		while (count--)
		{
			*(char*)dest = *(char*)src;
			dest = (char*)dest - 1;
			src = (char*)src - 1;
		}
	}
	return ret;
}
int main()
{
	int arr[10] = { 1,2,3,4,5,6,7,8,9,10 };

	my_memmove(arr + 2, arr, 4 * sizeof(int));//1 2 1 2 3 4 7 8 9 10  
	//my_memcpy(arr + 2, arr, 4 * sizeof(int));//1 2 1 2 1 2 7 8 9 10

	int i;
	for (i = 0; i < 10; i++)
	{
		printf("%d ", arr[i]);
	}
	return 0;
}

memcmp

Three parameters are received. The first two are of void * type, pointing to the two pieces of content to be compared, and the last one is size_ The parameter of T indicates how many bytes to compare

#include <stdio.h>
#include <string.h>
int main()
{
	char buffer1[] = "DWgaOtP12df0";
	char buffer2[] = "DWGAOTP12DF0";
	int n;
	n = memcmp(buffer1, buffer2, sizeof(buffer1));
	if (n > 0) printf("'%s' is greater than '%s'.\n", buffer1, buffer2);
	else if (n < 0) printf("'%s' is less than '%s'.\n", buffer1, buffer2);
	else printf("'%s' is the same as '%s'.\n", buffer1, buffer2);
	return 0;
}

Character classification function

Function returns true if its arguments meet the following conditions

  • iscntrl any control character
  • isspace blank characters: space ',' page feed '\ f', line feed '\ n', carriage return '\ r', tab '\ t' or vertical tab '\ v'
  • isdigit decimal digits 0 ~ 9
  • isxdigit hexadecimal digit, including all decimal digits, lowercase AF and uppercase AF
  • islower small letter a~z
  • isupper capital letters A~Z
  • isalpha letter AZ or AZ
  • isalnum letter or number, az,AZ,0~9
  • ispunct punctuation mark, any graphic character not belonging to numbers or letters (printable)
  • isgraph any graphic character
  • Isprintany printable character, including graphic characters and white space characters

String conversion function

  • tolower()
  • toupper()
#include <stdio.h>
int main ()
{
  int i=0;
  char str[]="Test String.\n";
  char c;
  while (str[i])
 {
    c=str[i];
    if (isupper(c)) 
        c=tolower(c);
    putchar (c);
    i++;
 }
  return 0; }

Topics: C