Äú¿ÉÒÔ¾èÖú£¬Ö§³ÖÎÒÃǵĹ«ÒæÊÂÒµ¡£

1Ôª 10Ôª 50Ôª





ÈÏÖ¤Â룺  ÑéÖ¤Âë,¿´²»Çå³þ?Çëµã»÷Ë¢ÐÂÑéÖ¤Âë ±ØÌî



  ÇóÖª ÎÄÕ ÎÄ¿â Lib ÊÓÆµ iPerson ¿Î³Ì ÈÏÖ¤ ×Éѯ ¹¤¾ß ½²×ù Model Center   Code  
»áÔ±   
   
 
     
   
 ¶©ÔÄ
  ¾èÖú
KNNË㷨ʵս£ºÑéÖ¤ÂëµÄʶ±ð
 
  2283  次浏览      28
 2019-2-25
   
 
±à¼­ÍƼö:

±¾ÎÄÀ´×ÔÓÚbiaodianfu£¬ÎÄÕÂÖ÷½éÉÜÁ˶þÖµ»¯¡¢×Ö·ûÇи×Ö·ûʶ±ðµÈ£¬Ê¹ÓÃKNN½øÐÐÑéÖ¤ÂëµÄʶ±ðµÄÏà¹ØÄÚÈÝ¡£

ʶ±ðÑéÖ¤ÂëµÄ·½Ê½ºÜ¶à£¬Èçtesseract¡¢SVMµÈ¡£½ñÌìÖ÷ҪѧϰµÄÊÇÈçºÎʹÓÃKNN½øÐÐÑéÖ¤ÂëµÄʶ±ð¡£

Êý¾Ý×¼±¸

±¾´ÎʵÑé²ÉÓõÄÊÇCSDNµÄÑéÖ¤Âë×öÑÝÁ·£¬Ïà¹ØµÄ½Ó¿Ú£º
https://download.csdn.net/index.php/rest/tools/
validcode/source_ip_validate/10.5711163911089325

Ŀǰ½Ó¿Ú·µ»ØµÄÑéÖ¤Âë¹²2ÖÖ£º

´¿Êý×Ö¡¢¸ÉÈÅСµÄÑéÖ¤Â룬¼òµ¥½øÐÐͼƬȥ³ý±³¾°¡¢¶þÖµ»¯ºÍãÐÖµ´¦Àíºó£¬Ê¹ÓÃkNNËã·¨¼´¿Éʶ±ð¡£

×Öĸ¼ÓÊý×Ö¡¢±³¾°ÓиÉÈÅ¡¢Í¼ÐÎ×Ö·ûλÖÃÓÐÇá΢±äÐΣ¬½øÐÐͼƬȥ³ý±³¾°¡¢¶þÖµ»¯ºÍãÐÖµ´¦Àíºó£¬Ê¹ÓÃkNNË㷨ʶ±ð

ÕâÀïÑ¡ÔñµÚ¶þÖÖ½øÐÐÆÆ½â¡£ÓÉÓÚÁ½ÖÖÑéÖ¤ÂëµÄͼƬ´óС²»Ò»Ñù£¬ËùÒÔ¿ÉÒÔʹÓÃͼƬ´óСÀ´ÅжÏÄĸöÊǵÚÒ»ÖÖÑéÖ¤Â룬ÄĸöÊǵڶþÖÖÑéÖ¤Âë¡£

ÏÂÔØÑéÖ¤Âë

import requests
import uuid
from PIL import Image
import os
url = "http://download.csdn.net/index.php/rest
/tools/validcode/source_ip_validate/10.5711163911089325"
for i in range(1000):
resp = requests.get(url)
filename = "./captchas/" + str(uuid.uuid4()) + ".png"
with open(filename, 'wb') as f:
for chunk in resp.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
f.close()
im = Image.open(filename)
if im.size != (70, 25):
im.close()
os.remove(filename)
else:
print(filename)

·Ö¸î×Ö·û

ÏÂÔØ¹ýºó£¬¾ÍÐèÒª¶Ô×Öĸ½øÐзָ·Ö¸î×Ö·û»¹ÊÇÒ»¼þ±È½ÏÂé·³µÄ¹¤×÷¡£

»Ò¶È»¯

½«²ÊÉ«µÄͼƬת»¯Îª»Ò¶ÈͼƬ£¬±ãÓÚºóÃæµÄ¶þÖµ»¯´¦Àí£¬Ê¾Àý´úÂ룺

from PIL import Image

file = ".\\captchas\\0a4a22cd-f16b-4ae4-bc52-cdf4c081301d.png"
im = Image.open(file)
im_gray = im.convert('L')
im_gray.show()

´¦Àíǰ£º

´¦Àíºó£º

¶þÖµ»¯

»Ò¶È»¯ÒÔºó£¬ÓÐÑÕÉ«µÄÏñËØµãΪ0-255Ö®¼äµÄÖµ¡£¶þÖµ»¯¾ÍÊǽ«´óÓÚij¸öÖµµÄÏñËØµã¶¼ÐÞ¸ÄΪ255£¬Ð¡ÓÚ¸ÃÖµµÄÐÞ¸ÄΪ0£¬Ê¾Àý´úÂ룺

from PIL import Image
import numpy as np
file = ".\\captchas\\0a4a22cd-f16b-4ae4-bc52-cdf4c081301d.png"
im = Image.open(file)
im_gray = im.convert('L')
# im_gray.show()

pix = np.array(im_gray)
print(pix.shape)
print(pix)

threshold = 100 #ãÐÖµ

pix = (pix > threshold) * 255
print(pix)

out = Image.fromarray(pix)
out.show()

¶þÖµ»¯Êä³öµÄ½á¹û£º

È¥³ý±ß¿ò

´Ó¶þÖµ»¯Êä³öµÄ½á¹û¿ÉÒÔ¿´µ½³ýÁË×Ö·û£¬»¹´æÔڱ߿ò£¬ÔÚÇиî×Ö·ûǰ»¹ÐèÒªÏȽ«±ß¿òÈ¥³ý¡£

border_width = 1
new_pix = pix[border_width:-border_width,border_width:-border_width

×Ö·ûÇиî

ÓÉÓÚ×Ö·ûÓë×Ö·û¼äûÓдæÔÚÁ¬½Ó£¬¿ÉÒÔʹÓñȽϼòµ¥µÄ¡°Í¶Ó°·¨¡±½øÐÐ×Ö·ûµÄÇиԭÀí¾ÍÊǽ«¶þÖµ»¯ºóµÄͼƬÏÈÔÚ´¹Ö±·½Ïò½øÐÐͶӰ£¬¸ù¾ÝͶӰºóµÄ¼«ÖµÀ´ÅжϷָî±ß½ç¡£·Ö¸îºóµÄСͼƬÔÙÔÚˮƽ·½Ïò½øÐÐͶӰ¡£

´úÂëʵÏÖ£º

def vertical_image(image):
height, width = image.shape
h = [0] * width
for x in range(width):
for y in range(height):
s = image[y, x]
if s == 255:
h[x] += 1
new_image = np.zeros(image.shape, np.uint8)
for x in range(width):
cv2.line(new_image, (x, 0), (x, h[x]), 255, 1)
cv2.imshow('vert_image', new_image)
cv2.waitKey()
cv2.destroyAllWindows()

ÕûÌå´úÂë

from PIL import Image
import cv2
import numpy as np
import os
import uuid


def clean_bg(filename):
im = Image.open(filename)
im_gray = im.convert('L')
image = np.array(im_gray)
threshold = 100 # ãÐÖµ
pix = (image > threshold) * 255
border_width = 1
new_image = pix[border_width:-border_width, border_width:-border_width]
return new_image


def get_col_rect(image):
height, width = image.shape
h = [0] * width
for x in range(width):
for y in range(height):
s = image[y, x]
if s == 0:
h[x] += 1
col_rect = []
in_line = False
start_line = 0
blank_distance = 1
for i in range(len(h)):
if not in_line and h[i] >= blank_distance:
in_line = True
start_line = i
elif in_line and h[i] < blank_distance:
rect = (start_line, i)
col_rect.append(rect)
in_line = False
start_line = 0
return col_rect


def get_row_rect(image):
height, width = image.shape
h = [0] * height
for y in range(height):
for x in range(width):
s = image[y, x]
if s == 0:
h[y] += 1
in_line = False
start_line = 0
blank_distance = 1
row_rect = (0, 0)
for i in range(len(h)):
if not in_line and h[i] >= blank_distance:
in_line = True
start_line = i
elif in_line and i == len(h)-1:
row_rect = (start_line, i)
elif in_line and h[i] < blank_distance:
row_rect = (start_line, i)
break
return row_rect


def get_block_image(image, col_rect):
col_image = image[0:image.shape[0], col_rect[0]:col_rect[1]]
row_rect = get_row_rect(col_image)
if row_rect[1] != 0:
block_image = image[row_rect[0]:row_rect[1], col_rect[0]:col_rect[1]]
else:
block_image = None
return block_image


def clean_bg(filename):
im = Image.open(filename)
im_gray = im.convert('L')
image = np.array(im_gray)
threshold = 100 # ãÐÖµ
pix = (image > threshold) * 255
border_width = 2
new_image = pix[border_width:-border_width, border_width:-border_width]
return new_image

def split(filename):
image = clean_bg(filename)
col_rect = get_col_rect(image)
for cols in col_rect:
block_image = get_block_image(image, cols)
if block_image is not None:
new_image_filename = 'letters/' + str(uuid.uuid4()) + '.png'
cv2.imwrite(new_image_filename, block_image)


if __name__ == '__main__':
for filename in os.listdir('captchas'):
current_file = 'captchas/' + filename
split(current_file)
print('split file:%s' % current_file)

Êý¾Ý¼¯×¼±¸

ÔÚÍê³ÉͼÏñÇиîºó£¬ÐèÒª×ö½«ÇзֵÄ×Öĸ½¨Á¢ÓɱêÇ©µÄÑù±¾¡£¼´½«ÇзֺóµÄ×Ö·ûÊáÀíµ½ÕýÈ·µÄ·ÖÀàÖС£±È½Ï³£¼ûµÄ·½Ê½ÊÇÈ˹¤ÊáÀí¡£

ÓÉÓÚͼÏñ±È½Ï¶à£¬ÕâÀïʹÓÃʹÓÃTesseract-OCR½øÐÐʶ±ð¡£

¹Ù·½ÏîÄ¿µØÖ·£ºhttps://github.com/tesseract-ocr/tesseract

Windows°²×°°üµØÖ·£ºhttps://github.com/UB-Mannheim/tesseract/wiki

Tesseract-OCRµÄ°²×°

ÏÂÔØÍê°²×°°üºó£¬Ö±½ÓÔËÐа²×°¼´¿É£¬±È½ÏÖØÒªµÄÊÇ»·¾³±äÁ¿µÄÉèÖá£

½«°²×°Ä¿Â¼£¨D:\Program Files (x86)\Tesseract-OCR£©Ìí¼Ó½øPATH

н¨TESSDATA_PREFIXϵͳ±äÁ¿£¬ÖµÎªtessdata Îļþ¼ÐµÄ·¾¶£¨D:\Program Files (x86)\Tesseract-OCR\tessdata£©

°²×°Python°üpytesseract£¨pip install pytesseract£©

Tesseract-OCRµÄʹÓÃ

ʹÓÃÆðÀ´·Ç³£µÄ¼òµ¥£¬´úÂëÈçÏ£º

from PIL import Image
import pytesseract
import os


def copy_to_dir(filename):
image = Image.open(filename)
code = pytesseract.image_to_string(image, config="-c tessedit"
"_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
" --psm 10"
" -l osd"
" ")
if not os.path.exists("dataset/" + code):
os.mkdir("dataset/" + code)
image.save("dataset/" + code + filename.replace("letters", ""))
image.close()


if __name__ == "__main__":
for filename in os.listdir('letters'):
current_file = 'letters/' + filename
copy_to_dir(current_file)
print(current_file)

ÓÉÓÚTesseract-OCRʶ±ðµÄ׼ȷÂʷdz£µÄµÍ£¬ÍêÈ«²»ÄÜʹÓã¬·ÅÆú~£¬»¹ÊÇÐèÒªÊÖ¹¤ÕûÀí¡£

ͼƬ³ß´çͳһ

ÔÚÍê³ÉÈ˹¤´¦Àíºó£¬·¢ÏÖÇиîºóµÄͼƬ´óС²»Ò»¡£ÔÚ×Ö·ûʶ±ðǰÐèÒª¶ÔͼƬ½øÐÐµÄ³ß´ç½øÐÐͳһ¡£

¾ßÌåʵÏÖ·½·¨£º

import cv2

def image_resize(filename):
img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE) #¶ÁȡͼƬʱ²ÉÓõ¥Í¨µÀ
print(img)
if img.shape[0] != 10 or img.shape[1] != 6:
img = cv2.resize(img, (6, 10), interpolation=cv2.INTER_CUBIC)
print(img)
cv2.imwrite(filename, img)

ʹÓÃcv2.resizeʱ£¬²ÎÊýÊäÈëÊÇ ¿í¡Á¸ß¡ÁͨµÀ£¬ÕâÀïʹÓõÄʱµ¥Í¨µÀµÄ£¬interpolationµÄÑ¡ÏîÓУº

INTER_NEAREST ×î½üÁÚ²åÖµ

INTER_LINEAR Ë«ÏßÐÔ²åÖµ£¨Ä¬ÈÏÉèÖã©

INTER_AREA ʹÓÃÏñËØÇøÓò¹ØÏµ½øÐÐÖØ²ÉÑù¡£ Ëü¿ÉÄÜÊÇͼÏñ³éÈ¡µÄÊ×Ñ¡·½·¨£¬ÒòΪËü»á²úÉúÎÞÔÆÎÆÀíµÄ½á¹û¡£ µ«Êǵ±Í¼ÏñËõ·Åʱ£¬ËüÀàËÆÓÚINTER_NEAREST·½·¨¡£

INTER_CUBIC 4¡Á4ÏñËØÁÚÓòµÄË«Èý´Î²åÖµ

INTER_LANCZOS4 8¡Á8ÏñËØÁÚÓòµÄLanczos²åÖµ

ÁíÍâΪÁËÈÃÊý¾Ý¸ü¼Ó±ãÓÚÀûÓ㬿ÉÒÔ½«Í¼Æ¬ÔÙ½øÐжþÖµ»¯µÄ¹éÒ»¡£¾ßÌå´úÂëÈçÏ£º

import cv2
import numpy as np

def image_normalize(filename):
img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE) #¶ÁȡͼƬʱ²ÉÓõ¥Í¨µÀ
if img.shape[0] != 10 or img.shape[1] != 6:
img = cv2.resize(img, (6, 10), interpolation=cv2.INTER_CUBIC)
normalized_img = np.zeros((6, 10)) # ¹éÒ»»¯
normalized_img = cv2.normalize(img, normalized_img, 0, 1, cv2.NORM_MINMAX)
cv2.imwrite(filename, normalized_img)

¹éÒ»»¯µÄÀàÐÍ£¬¿ÉÒÔÓÐÒÔϵÄȡֵ£º

NORM_MINMAX:Êý×éµÄÊýÖµ±»Æ½ÒÆ»òËõ·Åµ½Ò»¸öÖ¸¶¨µÄ·¶Î§£¬ÏßÐÔ¹éÒ»»¯£¬Ò»°ã½Ï³£Óá£

NORM_INF:´ËÀàÐ͵͍ÒåûÓв鵽£¬¸ù¾ÝOpenCV 1µÄ¶ÔÓ¦Ï¿ÉÄÜÊǹéÒ»»¯Êý×éµÄC-·¶Êý(¾ø¶ÔÖµµÄ×î´óÖµ)

NORM_L1 : ¹éÒ»»¯Êý×éµÄL1-·¶Êý(¾ø¶ÔÖµµÄºÍ)

NORM_L2: ¹éÒ»»¯Êý×éµÄ(Å·¼¸ÀïµÂ)L2-·¶Êý

×Ö·ûʶ±ð

×Ö·ûͼƬ ¿í6¸öÏñËØ£¬¸ß10¸öÏñËØ £¬ÀíÂÛÉÏ¿ÉÒÔ×î¼òµ¥´Ö±©µØ¿ÉÒÔ¶¨Òå³ö60¸öÌØÕ÷£º60¸öÏñËØµãÉÏÃæµÄÏñËØÖµ¡£µ«ÊÇÏÔÈ»ÕâÑù¸ßά¶È±ØÈ»»áÔì³É¹ý´óµÄ¼ÆËãÁ¿£¬¿ÉÒÔÊʵ±µÄ½µÎ¬¡£±ÈÈ磺

ÿÐÐÉϺÚÉ«ÏñËØµÄ¸öÊý£¬¿ÉÒԵõ½10¸öÌØÕ÷

ÿÁÐÉϺÚÉ«ÏñËØµÄ¸öÊý£¬¿ÉÒԵõ½6¸öÌØÕ÷

from sklearn.neighbors import KNeighborsClassifier
import os
from sklearn import preprocessing
import cv2
import numpy as np
import warnings
warnings.filterwarnings(module='sklearn*', action='ignore', category=DeprecationWarning)


def get_feature(file_name):
img = cv2.imread(file_name, cv2.IMREAD_GRAYSCALE) # ¶ÁȡͼƬʱ²ÉÓõ¥Í¨µÀ
height, width = img.shape

pixel_cnt_list = []
for y in range(height):
pix_cnt_x = 0
for x in range(width):
if img[y, x] == 0: # ºÚÉ«µã
pix_cnt_x += 1

pixel_cnt_list.append(pix_cnt_x)

for x in range(width):
pix_cnt_y = 0
for y in range(height):
if img[y, x] == 0: # ºÚÉ«µã
pix_cnt_y += 1

pixel_cnt_list.append(pix_cnt_y)

return pixel_cnt_list


if __name__ == "__main__":
test = get_feature("dataset/K/04a0844c-12f2-4344-9b78-ac1d28d746c0.png")
category = []
features = []
for dir_name in os.listdir('dataset'):
for filename in os.listdir('dataset/' + dir_name):
category.append(dir_name)
current_file = 'dataset/' + dir_name + '/' + filename
feature = get_feature(current_file)
features.append(feature)
# print(current_file)
le = preprocessing.LabelEncoder()
label = le.fit_transform(category)

model = KNeighborsClassifier(n_neighbors=1)
model.fit(features, label)
predicted= model.predict(np.array(test).reshape(1, -1))
print(predicted)
print(le.inverse_transform(predicted))

ÕâÀïÖ±½ÓʹÓÃÁËsklearnÖеÄKNN·½·¨

 
   
2283 ´Îä¯ÀÀ       28
Ïà¹ØÎÄÕÂ

»ùÓÚͼ¾í»ýÍøÂçµÄͼÉî¶Èѧϰ
×Ô¶¯¼ÝÊ»ÖеÄ3DÄ¿±ê¼ì²â
¹¤Òµ»úÆ÷ÈË¿ØÖÆÏµÍ³¼Ü¹¹½éÉÜ
ÏîĿʵս£ºÈçºÎ¹¹½¨ÖªÊ¶Í¼Æ×
 
Ïà¹ØÎĵµ

5GÈ˹¤ÖÇÄÜÎïÁªÍøµÄµäÐÍÓ¦ÓÃ
Éî¶ÈѧϰÔÚ×Ô¶¯¼ÝÊ»ÖеÄÓ¦ÓÃ
ͼÉñ¾­ÍøÂçÔÚ½»²æÑ§¿ÆÁìÓòµÄÓ¦ÓÃÑо¿
ÎÞÈË»úϵͳԭÀí
Ïà¹Ø¿Î³Ì

È˹¤ÖÇÄÜ¡¢»úÆ÷ѧϰ&TensorFlow
»úÆ÷ÈËÈí¼þ¿ª·¢¼¼Êõ
È˹¤ÖÇÄÜ£¬»úÆ÷ѧϰºÍÉî¶Èѧϰ
ͼÏñ´¦ÀíËã·¨·½·¨Óëʵ¼ù