preface
Happy new year. Welcome to the last experiment in this book.
In depth understanding of computer system experiment 8: Proxy Lab download and official document machine translation, please see:
https://blog.csdn.net/weixin_43362650/article/details/122770330
I think this document is very helpful to the whole experiment.
Experimental task
Write a simple HTTP proxy server
The full English name of Proxy Server is Proxy Server. Its function is to proxy network users to obtain network information. Figuratively speaking: it is the transit station of network information. In general, when we use a Web browser to directly connect to other Internet sites to obtain network information, we must send a Request signal to get an answer, and then the other party will send the information back in bit mode. The Proxy Server is a server between the browser and the Web server. With it, the browser does not directly go to the Web server to retrieve the Web page, but sends a Request to the Proxy Server. The Request signal will be sent to the Proxy Server first. The Proxy Server will retrieve the information required by the browser and send it to your browser Baidu Encyclopedia
The relationship between the original client and the server.
After adding the agent
What the proxy server needs to do is to receive the request sent by the client, send the request to the server after its own processing, and forward the data responded by the server to the proxy server and back to the client.
The proxy server of this experiment is divided into three stages
- Part I: implementation of sequential web proxy
- Part II: processing multiple concurrent requests
- Part 3: caching web objects
Part I: implementation of sequential web proxy
The code is as follows (see note for details)
#include <stdio.h> #include "csapp.h" /* Recommended max cache and object sizes */ #define MAX_CACHE_SIZE 1049000 #define MAX_OBJECT_SIZE 102400 /* You won't lose style points for including this long line in your code */ static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n"; void doit(int clientfd); int parse_uri(char *uri,char *hostname,char *path,char *port,char *request_head); void read_requesthdrs(rio_t *rp,int fd); void return_content(int serverfd, int clientfd); int main(int argc,char **argv) { int listenfd,connfd; char hostname[MAXLINE],port[MAXLINE]; socklen_t clientlen; struct sockaddr_storage clientaddr; if(argc != 2){ fprintf(stderr, "usage: %s <port>\n", argv[0]); exit(1); } /* The agent creates a listening descriptor and is ready to receive the connection request */ listenfd = Open_listenfd(argv[1]); while(1){ clientlen = sizeof(clientaddr); /* Waiting for the connection request from the client to the listening descriptor listenfd, Then fill in the socket address of the client in addr and return a connected descriptor */ connfd = Accept(listenfd,(SA *)&clientaddr,&clientlen); /* Convert the socket address structure clientaddr into the corresponding host and service name string, And copy them to the hostname and port buffers */ Getnameinfo((SA *)&clientaddr,clientlen,hostname,MAXLINE,port,MAXLINE,0); printf("Accepted connection from (%s, %s)\n",hostname,port); doit(connfd); Close(connfd); } } /* Handle client HTTP transactions */ void doit(int clientfd){ char buf[MAXLINE],method[MAXLINE],uri[MAXLINE],version[MAXLINE]; char hostname[MAXLINE],path[MAXLINE],port[MAXLINE],request_head[MAXLINE]; int serverfd; rio_t rio; /* Read request line and request header */ Rio_readinitb(&rio,clientfd); Rio_readlineb(&rio,buf,MAXLINE); sscanf(buf,"%s %s %s",method,uri,version); if(strcasecmp(method,"GET")){ printf("Not implemented"); return; } /* Parse the uri to get hostname, path and port. Generate request_head */ parse_uri(uri,hostname,path,port,request_head); /* Establish a connection to the server */ serverfd = Open_clientfd(hostname,port); /* Pass the request header to the server */ Rio_writen(serverfd,request_head,strlen(request_head)); read_requesthdrs(&rio,serverfd); /* Return the data read by the server to the client */ return_content(serverfd,clientfd); } /* Resolve the hostname and path and port in the uri. And generate a request_head * uri example: http://www.cmu.edu:8080/hub/index.html * hostname:www.cmu.edu * path:/hub/index.html * port:8080 */ int parse_uri(char *uri,char *hostname,char *path,char *port,char *request_head){ sprintf(port,"80"); //Default value char *end,*bp; char *tail = uri+strlen(uri); //The last character of the uri, not '\ 0'. char *bg = strstr(uri,"//"); bg = (bg!=NULL ? bg+2 : uri); //Start with hostname. end = bg; //Take the ending hostname. while(*end != '/' && *end != ':') end++; strncpy(hostname,bg,end-bg); bp = end + 1; //Start with port if(*end == ':'){ //==':' Description: port in uri end++; bp = strstr(bg,"/"); //Take the end of port strncpy(port,end,bp-end); end = bp; //Get the beginning of uri } strncpy(path,end,(int)(tail-end)+1); /* The first line of the request: get / hub / index html HTTP/1.0. */ sprintf(request_head,"GET %s HTTP/1.0\r\nHost: %s\r\n",path,hostname); return 1; } /* * Read HTTP request header * Host,User-Agent,Connection And proxy connection * Keep other headers */ void read_requesthdrs(rio_t *rp,int fd){ char buf[MAXLINE]; sprintf(buf, "%s", user_agent_hdr); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Connection: close\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Proxy-Connection: close\r\n"); Rio_writen(fd, buf, strlen(buf)); /* Keep other headers */ for(Rio_readlineb(rp,buf,MAXLINE);strcmp(buf,"\r\n");Rio_readlineb(rp,buf,MAXLINE)){ if(strncmp("Host",buf,4) == 0 || strncmp("User-Agent",buf,10) == 0 || strncmp("Connection",buf,10) == 0 || strncmp("Proxy-Connection",buf,16) == 0) continue; printf("%s",buf); Rio_writen(fd,buf,strlen(buf)); } Rio_writen(fd,buf,strlen(buf)); return; } /* * Return the data read by the server to the client */ void return_content(int serverfd, int clientfd){ size_t n; char buf[MAXLINE]; rio_t srio; Rio_readinitb(&srio,serverfd); while((n = Rio_readlineb(&srio,buf,MAXLINE)) != 0){ Rio_writen(clientfd,buf,n); } }
compile
Maybe some environments are not installed well. The first time I didn't have "curl".
linux> apt-get install curl
function
You can see that the first part has been completed.
Part II: processing multiple concurrent requests
Refer to 12.3.8 of CS:APP3e. Modify the code of the first part of the thread based concurrent server (modify the main function and add the thread function)
#include <stdio.h> #include "csapp.h" /* Recommended max cache and object sizes */ #define MAX_CACHE_SIZE 1049000 #define MAX_OBJECT_SIZE 102400 /* You won't lose style points for including this long line in your code */ static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n"; void doit(int clientfd); int parse_uri(char *uri,char *hostname,char *path,char *port,char *request_head); void read_requesthdrs(rio_t *rp,int fd); void return_content(int serverfd, int clientfd); void *thread(void *vargp); int main(int argc,char **argv) { int listenfd; int *connfd; /* Use pointers to avoid competition */ char hostname[MAXLINE],port[MAXLINE]; socklen_t clientlen; struct sockaddr_storage clientaddr; pthread_t tid; if(argc != 2){ fprintf(stderr, "usage: %s <port>\n", argv[0]); exit(1); } /* The agent creates a listening descriptor and is ready to receive the connection request */ listenfd = Open_listenfd(argv[1]); while(1){ clientlen = sizeof(clientaddr); connfd = Malloc(sizeof(int)); /* Waiting for the connection request from the client to the listening descriptor listenfd, Then fill in the socket address of the client in addr and return a connected descriptor */ *connfd = Accept(listenfd,(SA *)&clientaddr,&clientlen); /* Convert the socket address structure clientaddr into the corresponding host and service name string, And copy them to the hostname and port buffers */ Getnameinfo((SA *)&clientaddr,clientlen,hostname,MAXLINE,port,MAXLINE,0); printf("Accepted connection from (%s, %s)\n",hostname,port); /* Call pthread_create function to create other threads */ Pthread_create(&tid,NULL,thread,connfd); } } void *thread(void *vargp){ int connfd = *((int *)vargp); Pthread_detach(pthread_self()); Free(vargp); doit(connfd); Close(connfd); return NULL; } /* Handle client HTTP transactions */ void doit(int clientfd){ char buf[MAXLINE],method[MAXLINE],uri[MAXLINE],version[MAXLINE]; char hostname[MAXLINE],path[MAXLINE],port[MAXLINE],request_head[MAXLINE]; int serverfd; rio_t rio; /* Read request line and request header */ Rio_readinitb(&rio,clientfd); Rio_readlineb(&rio,buf,MAXLINE); sscanf(buf,"%s %s %s",method,uri,version); if(strcasecmp(method,"GET")){ printf("Not implemented"); return; } /* Parse the uri to get hostname, path and port. Generate request_head */ parse_uri(uri,hostname,path,port,request_head); /* Establish a connection to the server */ serverfd = Open_clientfd(hostname,port); /* Pass the request header to the server */ Rio_writen(serverfd,request_head,strlen(request_head)); read_requesthdrs(&rio,serverfd); /* Return the data read by the server to the client */ return_content(serverfd,clientfd); } /* Resolve the hostname and path and port in the uri. And generate a request_head * uri example: http://www.cmu.edu:8080/hub/index.html * hostname:www.cmu.edu * path:/hub/index.html * port:8080 */ int parse_uri(char *uri,char *hostname,char *path,char *port,char *request_head){ sprintf(port,"80"); //Default value char *end,*bp; char *tail = uri+strlen(uri); //The last character of the uri, not '\ 0'. char *bg = strstr(uri,"//"); bg = (bg!=NULL ? bg+2 : uri); //Start with hostname. end = bg; //End with hostname. while(*end != '/' && *end != ':') end++; strncpy(hostname,bg,end-bg); bp = end + 1; //Start with port if(*end == ':'){ //==':' Description: port in uri end++; bp = strstr(bg,"/"); //Take the end of port strncpy(port,end,bp-end); end = bp; //Get the beginning of uri } strncpy(path,end,(int)(tail-end)+1); /* The first line of the request: get / hub / index html HTTP/1.0. */ sprintf(request_head,"GET %s HTTP/1.0\r\nHost: %s\r\n",path,hostname); return 1; } /* * Read HTTP request header * Host,User-Agent,Connection And proxy connection * Keep other headers */ void read_requesthdrs(rio_t *rp,int fd){ char buf[MAXLINE]; sprintf(buf, "%s", user_agent_hdr); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Connection: close\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Proxy-Connection: close\r\n"); Rio_writen(fd, buf, strlen(buf)); /* Keep other headers */ for(Rio_readlineb(rp,buf,MAXLINE);strcmp(buf,"\r\n");Rio_readlineb(rp,buf,MAXLINE)){ if(strncmp("Host",buf,4) == 0 || strncmp("User-Agent",buf,10) == 0 || strncmp("Connection",buf,10) == 0 || strncmp("Proxy-Connection",buf,16) == 0) continue; printf("%s",buf); Rio_writen(fd,buf,strlen(buf)); } Rio_writen(fd,buf,strlen(buf)); return; } /* * Return the data read by the server to the client */ void return_content(int serverfd, int clientfd){ size_t n; char buf[MAXLINE]; rio_t srio; Rio_readinitb(&srio,serverfd); while((n = Rio_readlineb(&srio,buf,MAXLINE)) != 0){ Rio_writen(clientfd,buf,n); } }
function
You can see that the second part has been completed
Part 3: caching web objects
Readers are preferred, and the code is as follows
#include <stdio.h> #include "csapp.h" /* Recommended max cache and object sizes */ #define MAX_CACHE_SIZE 1049000 #define MAX_OBJECT_SIZE 102400 #define MAX_CACHE 10 /* You won't lose style points for including this long line in your code */ static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n"; void doit(int clientfd); int parse_uri(char *uri,char *hostname,char *path,char *port,char *request_head); void read_requesthdrs(rio_t *rp,int fd); void return_content(int serverfd, int clientfd,char *url); void *thread(void *vargp); int maxlrucache(); /*Structure of reader writer lock*/ struct RWLOCK_T{ sem_t lock; //Basic lock sem_t writeLock; //It says lock int readcnt; //Number of readers }; /*LRU Cached structure*/ struct CACHE{ int lruNumber; //Number of references, ranked by size. Large indicates that it has been quoted recently char url[MAXLINE]; //Uniquely identify the corresponding content through the url char content[MAX_OBJECT_SIZE]; }; struct CACHE cache[MAX_CACHE]; //Cache, MAX_CACHE struct RWLOCK_T* rw; //Reader writer lock pointer void rwlock_init(); //Initialize reader writer lock pointer char *readcache(char *url); //Read cache void writecache(char *buf,char *url); //Write cache int main(int argc,char **argv) { int listenfd; int *connfd; /* Use pointers to avoid competition */ char hostname[MAXLINE],port[MAXLINE]; socklen_t clientlen; struct sockaddr_storage clientaddr; pthread_t tid; if(argc != 2){ fprintf(stderr, "usage: %s <port>\n", argv[0]); exit(1); } rw = Malloc(sizeof(struct RWLOCK_T)); rwlock_init(); /* The agent creates a listening descriptor and is ready to receive the connection request */ listenfd = Open_listenfd(argv[1]); while(1){ clientlen = sizeof(clientaddr); connfd = Malloc(sizeof(int)); /* Waiting for the connection request from the client to the listening descriptor listenfd, Then fill in the socket address of the client in addr and return a connected descriptor */ *connfd = Accept(listenfd,(SA *)&clientaddr,&clientlen); /* Convert the socket address structure clientaddr into the corresponding host and service name string, And copy them to the hostname and port buffers */ Getnameinfo((SA *)&clientaddr,clientlen,hostname,MAXLINE,port,MAXLINE,0); printf("Accepted connection from (%s, %s)\n",hostname,port); /* Call pthread_create function to create other threads */ Pthread_create(&tid,NULL,thread,connfd); } } void *thread(void *vargp){ int connfd = *((int *)vargp); Pthread_detach(pthread_self()); Free(vargp); doit(connfd); Close(connfd); return NULL; } /* Handle client HTTP transactions */ void doit(int clientfd){ char buf[MAXLINE],method[MAXLINE],uri[MAXLINE],version[MAXLINE]; char hostname[MAXLINE],path[MAXLINE],port[MAXLINE],request_head[MAXLINE]; int serverfd; rio_t rio; /* Read request line and request header */ Rio_readinitb(&rio,clientfd); Rio_readlineb(&rio,buf,MAXLINE); sscanf(buf,"%s %s %s",method,uri,version); if(strcasecmp(method,"GET")){ printf("Not implemented"); return; } char *content = readcache(uri); if(content != NULL){ Rio_writen(clientfd,content,strlen(content)); free(content); }else{ /* Resolve uri and port. Generate request_head */ parse_uri(uri,hostname,path,port,request_head); /* Establish a connection to the server */ serverfd = Open_clientfd(hostname,port); /* Pass the request header to the server */ Rio_writen(serverfd,request_head,strlen(request_head)); read_requesthdrs(&rio,serverfd); /* Return the data read by the server to the client */ return_content(serverfd,clientfd,uri); } } /* Resolve the hostname and path and port in the uri. And generate a request_head * uri example: http://www.cmu.edu:8080/hub/index.html * hostname:www.cmu.edu * path:/hub/index.html * port:8080 */ int parse_uri(char *uri,char *hostname,char *path,char *port,char *request_head){ sprintf(port,"80"); //Default value char *end,*bp; char *tail = uri+strlen(uri); //The last character of the uri, not '\ 0'. char *bg = strstr(uri,"//"); bg = (bg!=NULL ? bg+2 : uri); //Start with hostname. end = bg; //End with hostname. while(*end != '/' && *end != ':') end++; strncpy(hostname,bg,end-bg); bp = end + 1; //Start with port if(*end == ':'){ //==':' Description: port in uri end++; bp = strstr(bg,"/"); //Take the end of port strncpy(port,end,bp-end); end = bp; //Get the beginning of uri } strncpy(path,end,(int)(tail-end)+1); /* The first line of the request: get / hub / index html HTTP/1.0. */ sprintf(request_head,"GET %s HTTP/1.0\r\nHost: %s\r\n",path,hostname); return 1; } /* * Read HTTP request header * Host,User-Agent,Connection And proxy connection * Keep other headers */ void read_requesthdrs(rio_t *rp,int fd){ char buf[MAXLINE]; sprintf(buf, "%s", user_agent_hdr); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Connection: close\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Proxy-Connection: close\r\n"); Rio_writen(fd, buf, strlen(buf)); /* Keep other headers */ for(Rio_readlineb(rp,buf,MAXLINE);strcmp(buf,"\r\n");Rio_readlineb(rp,buf,MAXLINE)){ if(strncmp("Host",buf,4) == 0 || strncmp("User-Agent",buf,10) == 0 || strncmp("Connection",buf,10) == 0 || strncmp("Proxy-Connection",buf,16) == 0) continue; printf("%s",buf); Rio_writen(fd,buf,strlen(buf)); } Rio_writen(fd,buf,strlen(buf)); return; } /* * Return the data read by the server to the client */ void return_content(int serverfd, int clientfd,char *uri){ size_t n,size = 0; char buf[MAXLINE],content[MAX_OBJECT_SIZE]; rio_t srio; Rio_readinitb(&srio,serverfd); while((n = Rio_readlineb(&srio,buf,MAXLINE)) != 0){ Rio_writen(clientfd,buf,n); if(n + size <= MAX_OBJECT_SIZE){ sprintf(content + size,"%s",buf); size += n; }else{ size = MAX_OBJECT_SIZE + 1; } } writecache(content,uri); } /*-----cache start-----*/ void rwlock_init(){ rw->readcnt = 0; sem_init(&rw->lock,0,1); sem_init(&rw->writeLock,0,1); } void writecache(char *buf,char *url){ sem_wait(&rw->writeLock); //Waiting for writer lock int index; /*See if the cache is empty*/ for(index = 0;index < MAX_CACHE;index++){ if(cache[index].lruNumber == 0){ break; } } /*There are no vacant seats and they will be expelled according to LRU policy*/ if(index == MAX_CACHE){ int minlru = cache[0].lruNumber; /*Find the last accessed cache*/ for(int i = 1;i < MAX_CACHE;i++){ if(cache[i].lruNumber < minlru){ minlru = cache[i].lruNumber; index = i; } } } cache[index].lruNumber = maxlrucache()+1; strcpy(cache[index].url,url); strcpy(cache[index].content,buf); sem_post(&rw->writeLock); //Release lock return; } char *readcache(char *url){ sem_wait(&rw->lock); //The reader waits and acquires the lock if(rw->readcnt == 1) sem_wait(&rw->writeLock); //The reader is reading and no writer is allowed rw->readcnt++; sem_post(&rw->lock); //Release lock char *content = NULL; for(int i = 0;i < MAX_CACHE;i++){ /*The corresponding cache was found*/ if(strcmp(url,cache[i].url) == 0){ content = (char *)Malloc(strlen(cache[i].content)); strcpy(content,cache[i].content); int maxlru = maxlrucache(); //Get the maximum lru cache[i].lruNumber = maxlru+1; //+10% of the largest lru break; } } sem_wait(&rw->lock); //Wait and acquire lock rw->readcnt--; if(rw->readcnt == 0) //There are no readers. Release the writer lock sem_post(&rw->writeLock); sem_post(&rw->lock); //Release lock return content; } int maxlrucache(){ int i; int max=0; for(i = 0;i<MAX_CACHE;i++){ if(cache[i].lruNumber > max){ max = cache[i].lruNumber; } } return max; } /*-----cache end-----*/
Operation results
You can see that the third part has been completed
epilogue
The test score of this experiment is not too strict, and the full score can be achieved by realizing the basic functions. For example, if I delete the code of the lock part, it is also full score. All the above codes are not necessarily strictly correct.