Python 《Python识别登录验证码》实验报告

WechatIMG16.jpeg

仓库地址

python-captcha

环境

  • Python 3.5.2

  • Pillow 7.2.0

Pillow 是一个 Python 图像处理库。

安装Pillow

  • 安装
1
pip3 install pillow
  • 查看版本
1
2
$ python3 -m pip freeze | grep Pillow
Pillow==7.2.0

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#  -*- coding:utf8 -*-
from PIL import Image
import hashlib
import time
import os
import math
import string

class VectorCompare:
'''
向量空间类
'''
def magnitude(self, concordance):
'''
计算矢量大小
'''
total = 0
for word, count in concordance.items():
total += count ** 2 # 返回count的2次幂
return math.sqrt(total)

def relation(self, concordance1, concordance2):
'''
计算矢量之间的cos值
'''
relevance = 0
topvalue = 0
for word, count in concordance1.items():
if word in concordance2:
topvalue += count * concordance2[word]
return topvalue / (self.magnitude(concordance1) * self.magnitude(concordance2))

def buildvector(image):
'''
将图片转换为矢量
'''
d1 = {}
count = 0
for i in image.getdata():
d1[count] = i
count += 1
return d1

def letterIconset():
'''
字符图标集合
'''
# 需要训练的字符
iconset = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

# 字符训练集目录
letterPath = 'iconset'
# 加载训练集
captchaList = []
for letter in iconset:
for img in os.listdir(letterPath+'%s/'%(letter)):
temp = []
if img.endswith(".gif"): # 过滤非gif格式的文件
temp.append(buildvector(Image.open(letterPath+'%s/%s'%(letter, img))))
captchaList.append({letter:temp})
return captchaList

def pixelCollection(blackWhiteCaptcha):
'''
得到单个字符的像素集合
'''
inletter = False
foundletter=False
start = 0
end = 0

letters = []
for y in range(blackWhiteCaptcha.size[0]):
for x in range(blackWhiteCaptcha.size[1]):
pix = blackWhiteCaptcha.getpixel((y, x))
if pix != 255:
inletter = True

if foundletter == False and inletter == True:
foundletter = True
start = y

if foundletter == True and inletter == False:
foundletter = False
end = y
letters.append((start, end))

inletter=False
return letters

def blackWhite(captcha):
'''
构造一张黑白二值图片
'''
blackWhiteCaptcha = Image.new("P", captcha.size, 255)
# 将图片转换为8位像素模式
captcha.convert("P")
temp = {}
for x in range(captcha.size[1]):
for y in range(captcha.size[0]):
pix = captcha.getpixel((y, x))
temp[pix] = pix
if pix == 220 or pix == 227: # 这些是要得到的数字
blackWhiteCaptcha.putpixel((y, x), 0)
return blackWhiteCaptcha

def identifyCaptcha(captcha):
'''
识别验证码
'''
# 黑白二值图片中字符的像素信息
blackWhiteCaptcha = blackWhite(captcha)
# 单个字符的像素集合
letters = pixelCollection(blackWhiteCaptcha)
# 字符图标集合
captchaList = letterIconset()
# 向量空间
v = VectorCompare()
# 识别字符个数
count = 0
# 识别验证码
guessLetter = ''
for letter in letters:
m = hashlib.md5()
image = blackWhiteCaptcha.crop((letter[0], 0, letter[1], blackWhiteCaptcha.size[1]))
guess = []

for captcha in captchaList:
for x, y in captcha.items():
if len(y) != 0:
guess.append((v.relation(y[0], buildvector(image)), x))

guess.sort(reverse=True)
guessLetter += guess[0][1]
count += 1
return count,guessLetter

# 打开一张验证码图
captcha = Image.open("captcha.gif")
# 识别验证码返回结果
result = identifyCaptcha(captcha)
print("识别出%d位验证码:%s"%(result))

创建 captcha.py,内容如上。

执行

  • 验证码

captcha.gif

准备一张验证码图片,放在与 captcha.py 同级目录下。

  • 执行
1
2
$ python3 captcha.py
识别出6位验证码:7s9t9j
-------------本文结束感谢您的阅读-------------
0%