百度ocr 识别验证码

Posted by tzwlwy's Blog on April 15, 2020

截图获取验证码图片

from selenium import webdriver
import time
from PIL import Image

url = "http://passport.molbase.cn/zh/auth/login?type=login&return_url=http://www.molbase.cn/#402d19e50fff44c827a4f3b608bd5812"
chrome_driver_path=r'C:\Users\24475\PycharmProjects\untitled2\chromedriver.exe'
driver = webdriver.Chrome(chrome_driver_path)
driver.maximize_window()

time.sleep(2)

driver.get(url)

time.sleep(5)
ca=driver.find_element_by_id('cpa_code')
ca.click()
time.sleep(2)
captcha_frame_abs_xy = driver.find_element_by_id('cpa_code').location
d=driver.find_element_by_xpath('//*[@id="cpa_code"]').location
print(d)

print("======")
print(captcha_frame_abs_xy)
#  网页截图 + 验证码截图

driver.get_screenshot_as_file(r'C:\Users\24475\PycharmProjects\untitled2\temp\1.png')  # 比较好理解
im = Image.open(r'C:\Users\24475\PycharmProjects\untitled2\temp\1.png')
# x=1510
# y=410
box = (1502, 401, 1651, 456)  # 设置要裁剪的区域,这个坐标是验证码图片这个长方形的左上角坐标和右下角坐标,不懂可以问UI美工,用图形编辑工具很容易获得
print(box)
print("pass")
region = im.crop(box)  # 此时,region是一个新的图像对象。
region.save(r"C:\Users\24475\PycharmProjects\untitled2\temp2\2.png")
# except:
#     print("网页截图 + 验证码截图出现异常")
time.sleep(5)
driver.close()
import  ocr
ocr.getContent(r'C:\Users\24475\PycharmProjects\untitled2\temp2\2.png')

使用百度aip去识别 这里的aip 需要下载baidu-aip

from aip import AipOcr
import re

""" 读取密码 """

#path="my_password.txt"
def getPassword():
    # with open(path, "r", encoding="utf-8") as f:
    APP_ID = ''
    API_KEY = ''
    SECRET_KEY = ''
    return APP_ID, API_KEY, SECRET_KEY

""" 读取图片 """

def get_file_content(file_path):
    with open(file_path, 'rb') as fp:
        return fp.read()



""" 识别图片内容 """
def getContent(file_path=r"C:\Users\24475\PycharmProjects\untitled2\1.png"):
    APP_ID, API_KEY, SECRET_KEY = getPassword()
    client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

    image = get_file_content(file_path)
    """ 调用通用文字识别, 图片参数为本地图片 """
    res = client.basicAccurate(image)
    print(type(res))
    print(res)
    return res['words_result'][0]['words']

def get_str(res):
    result = re.sub('\W+', '', res).replace("_", '')
    print(result)
    return result

if __name__ == "__main__":
    # value = '''
    # te . ssaa 啊啊?aass @#$%^&*()_+A \\/  [a-zA-Z0-9'!"#$%&\'()*+,-./:;<=>?@,。?★、…【】《》?“”‘'![\\]^_`{|}~]+
    # '''
    # import re
    res=getContent()
    get_str(res)