Blackboard class reptile pass - fifth pass

Posted by chefmars on Sun, 01 Dec 2019 23:54:47 +0100

This is the fifth level of reptile in blackboard class

image.png

Same need to log in

image.png

On the basis of the above, although it is not as complicated as the fourth pass, the verification code link is added here, which is very hard. At that time, it took a lot of effort to install the relevant python package configuration environment, mainly using two kinds of libraries, i.e. pyteseract and Image. I don't know much about them Look here , I don't know much about the verification code, so I simply called the method provided on the Internet, and then the others are almost the same as before

Streamlined process

  1. Landing verification
  2. Verification code verification
  3. Password login attempt
    Just repeat the next two steps
    Provide several versions for your reference

One.py (in principle, the code logic is OK, but the speed is extremely slow, and the verification code identification is not very clear, so sometimes there is a problem in the final result output. You can try it and help me find the wrong one)

import re
import requests
from lxml import etree
import pytesseract
from PIL import Image,ImageEnhance

def verification_Code(img_url):
    #Save verification code

    imgs = requests.get(img_url).content
    with open('1.jpg', 'wb') as f:
        f.write(imgs)

    image = Image.open('1.jpg')
    imgry = image.convert('L')#Image enhancement, binarization
    sharpness = ImageEnhance.Contrast(imgry)#Contrast enhancement
    sharp_img = sharpness.enhance(2.0)
    sharp_img.save('1.jpg')

    text = pytesseract.image_to_string(image)
    return text



def login():
    login_url = "http://www.heibanke.com/accounts/login"
    session = requests.Session()
    token = session.get(login_url).cookies['csrftoken']
    data = {
        'username': 'Koelre',
        'password': 'lixue961314',
        'csrfmiddlewaretoken': token
    }
    session.post(login_url, data)
    print("Login successfully")
    return session

def ex05(a=1,password=1):
    url = "http://www.heibanke.com/lesson/crawler_ex04/"
    session = login()
    html = session.get(url).text
    etr = etree.HTML(html)
    token = etr.xpath('/html/body/div/div/div[2]/form/input/@value')[0].strip()
    img_src = etr.xpath('/html/body/div/div/div[2]/form/div[3]/img/@src')[0].strip()
    #Verification code connection
    img_url = 'http://www.heibanke.com' + str(img_src)
    #Picture code
    pic_code = etr.xpath('//*[@id="id_captcha_0"]/@value')[0]
    text = verification_Code(img_url)

    data = {
        "csrfmiddlewaretoken": token,
        "username": "a",
        "password": password,
        "captcha_0": pic_code,
        "captcha_1": text
    }

    res = session.post(url, data).text
    verification_result = re.findall("Verification code input error", res)
    passwd_result = re.findall('The password you entered is wrong', res)
    h3 = re.findall('<h3>(.*?)</h3>', res)
    if verification_result:
        print(h3)
        print(text)
        print("retry")
        ex05(a+1, password)
    else:
        if passwd_result:
            print(h3)
            print("Password:%s error" %password)
            ex05(a, password+1)
        else:
            print("The password is:%s" %password)
            print(h3)



if __name__ == '__main__':
    ex05()

two.py

"""
//Blackboard guest, reptile, the fifth hurdle
http://www.heibanke.com/lesson/crawler_ex04
//Verification code processing
answer is 22
"""

import Image

from PIL import Image
from io import BytesIO
import pytesseract
import bs4
from bs4 import BeautifulSoup
import requests
import os
import re

pytesseract.pytesseract.tesseract_cmd = "D:\\Program Files (x86)\\Tesseract-OCR\\tesseract"

url = "http://www.heibanke.com/lesson/crawler_ex04/"
login_url = "http://www.heibanke.com/accounts/login/?next=/lesson/crawler_ex04/"

data={'username': 'medyg', 'password': '19931122bihu', 'csrfmiddlewaretoken': ''}

""" Open landing page """
loginr = requests.get(login_url)
if loginr.status_code == 200:
    cookie = loginr.cookies
    print("get login_url success, csrftoken is :" + cookie['csrftoken'])
else:
    print("get login_url failed")
data['csrfmiddlewaretoken'] = cookie['csrftoken']

""" Land """
signinr = requests.post(login_url, data = data, allow_redirects = False, cookies = cookie)
if signinr.status_code == 302:
    cookie2 = signinr.cookies
    print("post login_url success, csrftoken is :" + cookie2['csrftoken'])
else:
    print("post login_url failed, status_code is " + str(signinr.status_code))

data['csrfmiddlewaretoken'] = cookie2['csrftoken']
""" Obtain and identify the verification code( Using Tesseract-Ocr) """
guesses = 0
guess_success = 0
def get_captcha():
    global guesses
    print("\n Start getting the%d Secondary verification code" % guesses)
    captchar = requests.get(url, cookies = cookie2)
    soup = BeautifulSoup(captchar.text, "lxml")
    img_src = soup.find('img', class_='captcha').get('src')
    img_url = "http://www.heibanke.com" + img_src
    captcha_0_value = soup.find('input', id="id_captcha_0").get('value')
    data['captcha_0'] = captcha_0_value

    imgr = requests.get(img_url)
    if imgr.status_code == 200:
        print("Verification code picture obtained successfully")
        captcha_img = Image.open(BytesIO(imgr.content)) # content is the type of bytes
    else:
        print("Failed to get the captcha picture, get it again")
        return get_captcha()
    #captcha_img.show()
    print("Identifying")
    captcha_1 = pytesseract.image_to_string(captcha_img) # Using testeract to identify the verification code
    captcha_1 = captcha_1.strip()
    captcha_1 = captcha_1.replace(' ', '')
    guesses += 1
    if not re.match('^[A-Z | a-z]{4}$', captcha_1):
        print("Verification code identification failed:" + captcha_1)
        return get_captcha()
    else:
        print("Verification code identification succeeded:" + captcha_1)
        return captcha_0_value, captcha_1
""" Guess password """
pw = 0
while True:
    captcha_0_value, captcha_1 = get_captcha()
    guess_data = {
        'username' : 'medyg',
        'password' : pw,
        'csrfmiddlewaretoken' : cookie2['csrftoken'],
        'captcha_0' : captcha_0_value,
        'captcha_1' : captcha_1
    }
    print(guess_data)
    guessr = requests.post(url, guess_data, cookies = cookie2)
    if guessr.status_code == 200:
        soup = BeautifulSoup(guessr.text, 'lxml')
        h3 = soup.find('h3')
        if 'Verification code input error' in h3.text:
            print("If the verification code is wrong, re-enter the verification code. The recognition rate of the verification code is%f" % (float(guess_success) / guesses))
        elif 'Password error' in h3.text:
            guess_success += 1
            print("Password error, re-enter the password, the verification code recognition rate is%f" % (float(guess_success) / guesses))
            pw += 1
        else:
            guess_success += 1
            print(h3.text)
            print("The password is%d,The verification code identification rate is%f" % (pw,  (float(guess_success) / guesses)))
            break
    else:
        print("Request failed, re request%d" % guessr.status_code)

three.py

import requests
from bs4 import BeautifulSoup
import urllib.request
from PIL import Image
import pytesseract
import re
import os

URL = 'http://www.heibanke.com/lesson/crawler_ex04/'
LOGIN_URL = 'http://www.heibanke.com/accounts/login/?next=/lesson/crawler_ex04/'

login_page = requests.get(LOGIN_URL)

login_data = {
    'csrfmiddlewaretoken': login_page.cookies['csrftoken'],
    'username': 'fuyufjh',
    'password': '142857',
}

login_res = requests.post(LOGIN_URL, data=login_data, cookies=login_page.cookies, allow_redirects=False)

number = 0

while True:
    prob_res = requests.get(URL, cookies=login_res.cookies)
    soup = BeautifulSoup(prob_res.text, 'lxml')
    captcha_id = soup.find(id='id_captcha_0')['value']
    captcha_image_url = 'http://www.heibanke.com' + soup.find(alt='captcha')['src']
    try:
        urllib.request.urlretrieve(captcha_image_url, 'captcha.png')
        vcode_img = Image.open('captcha.png')
        vcode = pytesseract.image_to_string(vcode_img, lang='eng')
    finally:
        os.remove('captcha.png')
    if not re.match(r'[A-Z]{4}$', vcode):
        print('recognizing failed')
        continue
    data = {
        'username': 'fuyufjh',
        'password': number,
        'captcha_0': captcha_id,
        'captcha_1': vcode,
        'csrfmiddlewaretoken': prob_res.cookies['csrftoken']
    }

    print(data)
    guess_res = requests.post(URL, data=data, cookies=login_res.cookies)

    if 'Verification code input error' in guess_res.text:
        print('verify code error')
        continue
    elif 'Password error' in guess_res.text:
        print('Password is not %d' % number)
        number += 1
    else:
        print('Password is %d' % number)
        break
  • Refer to my for more code details Github

Topics: Session Python github