34 lines
862 B
Python
34 lines
862 B
Python
from PIL import Image
|
|
import pytesseract
|
|
import sys
|
|
|
|
"""OCR für Captcha-Bilder mit Tesseract only"""
|
|
|
|
# TEST config
|
|
custom_config = r'--psm 7 -c tessedit_char_whitelist="abcdefghijklmnopqrstuvwxyz123456890"'
|
|
|
|
def recognize_captcha(img_path):
|
|
im = Image.open(img_path).convert("L")
|
|
# 1. threshold the image
|
|
threshold = 150
|
|
table = []
|
|
for i in range(256):
|
|
if i < threshold:
|
|
table.append(0)
|
|
else:
|
|
table.append(1)
|
|
|
|
out = im.point(table, '1')
|
|
#out.show()
|
|
# 2. recognize with tesseract
|
|
num = pytesseract.image_to_string(out, config=custom_config)
|
|
return num
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) != 2:
|
|
print("Usage: python recognize.py <image_filename>")
|
|
res = recognize_captcha(sys.argv[1])
|
|
strs = res.split("\n")
|
|
if len(strs) >=1:
|
|
print(strs[0]) |