python crawler (crawling pictures)

Posted by on Mon, 11 Nov 2019 22:55:35 +0100

python crawler picture

Pictures of school flowers

First step

Load crawler module

#Load crawler module
import  re                  #Load crawler module
import  requests               #Load crawler module

The second step

Get the address of, get everything in its web page F12

#Get the address of, get everything in its web page F12
import  re                  #Load crawler module
import  requests               #Load crawler module

response = requests.get(f'')
data = response.text                #data is what we want in F12, and it is saved in string format

The third step

Get the address of school flower net, the link of picture

#Get the address of school flower net, the link of picture
import  re                  #Load crawler module
import  requests               #Load crawler module

response = requests.get(f'')
data = response.text                #data is what we want in F12, and it is saved in string format

one_list = re.findall('" src="(.*?)" /></a>',data) #Where (. *?) indicates what we want to get
for v in range(len(one_list)): #type:str this is to let pychar know that V is a string format and easy to import built-in methods
    if one_list[v].startswith('/d'):         #This is to make some of the protected images the right hyperlink format
        one_list[v] = f'{one_list[v]}'

The fourth step

Create a file and save it

#Get the address of school flower net, the link of picture
import  re                  #Load crawler module
import  requests               #Load crawler module

response = requests.get(f'')
data = response.text                #data is what we want in F12, and it is saved in string format

one_list = re.findall('" src="(.*?)" /></a>',data) #Where (. *?) indicates what we want to get
for v in range(len(one_list)): #type:str this is to let pychar know that V is a string format and easy to import built-in methods
    if one_list[v].startswith('/d'):         #This is to make some of the protected images the right hyperlink format
        one_list[v] = f'{one_list[v]}'
for x in one_list: #type:str
    name = x.split('/')[-1]   #To generate file names automatically
    dd = requests.get(x)      #Get information about pictures
    dd = dd.content           #Picture information installation into mechanical language
    with open(f'D:\picture\{name}','wb') as fw:  #Path to create file, write to save

The fifth step

Get all the information on the next page of the school flower net and print it every time

import  re                  #Modular
import  requests               #Modular
num = 0                         #In order to record the times of climbing photos
for url_name in range(44):
    response = requests.get(f'{url_name}.html')
    data = response.text
    # print(data)
    one_list = re.findall('" src="(.*?)" /></a>',data)
    for v in range(len(one_list)): #type:str
        if one_list[v].startswith('/d'):
            one_list[v] = f'{one_list[v]}'
    for x in one_list: #type:str
        name = x.split('/')[-1]
        dd = requests.get(x)
        dd = dd.content
        with open(f'D:\picture\{name}','wb') as fw:
            num += 1

Topics: Python