±à¼ÍƼö: |
ƪ²©ÎÄÖ÷Òª½éÉÜÂß¼»Ø¹é£¨logistic regression£©£¬Ê×ÏȽéÉÜÏà¹ØµÄ»ù´¡¸ÅÄîºÍÔÀí£¬È»ºóͨ¹ýPython´úÂëʵÏÖÂß¼»Ø¹éµÄ¶þ·ÖÀàÎÊÌâ¡£ÌØ±ðÇ¿µ÷£¬ÆäÖдó¶àÀíÂÛ֪ʶÀ´Ô´ÓÚ¡¶Í³¼ÆÑ§Ï°·½·¨_À¡·ºÍ˹̹¸£¿Î³Ì·Òë±Ê¼ÇÒÔ¼°Coursera»úÆ÷ѧϰ¿Î³Ì¡£
±¾ÎÄÀ´×ÔÓÚcsdn£¬ÓÉ»ðÁú¹ûÈí¼þAnna±à¼¡¢ÍƼö¡£ |
|
±¾Æª²©ÎĵÄÀíÂÛ֪ʶ¶¼À´×ÔÓÚÎâ´ó´óµÄCoursera»úÆ÷ѧϰ¿Î³Ì£¬È˼ҽ²µÄÉîÈëdz³ö£¬ÎҾͲ»Ò»Ò»×¸Êö£¬Ö»ÊǼòµ¥¸ÅÀ¨Ò»ÏÂÒÔ¼°¼ÇÒ»ÏÂ×Ô¼ºµÄ¼û½â¡£
1.Âß¼»Ø¹é¼ÙÉ躯Êý
Âß¼»Ø¹éÒ»°ãÓÃÓÚ·ÖÀàÎÊÌâ½Ï¶à£¬µ«ÊǽÐ×ö¡°regression¡±£¬¶øÏßÐԻعéÒ»°ã²»½¨ÒéÓÃÓÚ·ÖÀ࣬ÒòΪÊä³öµÄyµÄÖµ¿ÉÄܳ¬³ö0/1·¶Î§¡£ÕâÒ²¾ÍÊÇΪʲôÂß¼»Ø¹é¼ÙÉ躯ÊýÀïÃæÓÐsigmoidº¯ÊýµÄÔÒòÁË¡£

2.³É±¾º¯Êý
Âß¼»Ø¹éÎÊÌâ²»ÔÚ²ÉÓá°×îС¾ù·½¡±Îó²î£¬ÒòΪÀïÃæº¬ÓзÇÏßÐÔµÄsigmiodº¯Êý£¬Ê¹µÃ³É±¾º¯ÊýJ²»ÔÙÊÇÒ»¸öƽ»¬µÄ¡°Í롱£¬ÈÝÒ×µ¼Ö¡°¾Ö²¿×îÓÅ¡±£¬ËùÒÔ²ÉÓÃÈçÏÂcost
function:

3.²ÎÊýѧϰ(ÌݶÈϽµ)

Äã»á·¢ÏÖ£¬Õâ¸ö¸úÏßÐԻعéÄ£Ð͵ÄÌݶÈϽµ±í´ïÉÏһģһÑù£¬µ«ÊÇ£¬ÄãÒªÖªµÀ£¬ÆäÖеÄh(x)ÊDz»Ò»ÑùµÄ£¡
4.Python´úÂëʵÏÖ
ÏÈ¿´Ò»ÏÂÊý¾Ý¼¯£¬ÕâÊÇÒ»ÖÖ»¨µÄÊý¾Ý¼¯£¬Ö»È¡XÖеÄÁ½¸öÌØÕ÷£¨x1£¬x2£©£¬¶ÔÓÚyÖеÄÁ½¸öÀà±ð£¨0,1£©
from sklearn.datasets
import load_iris
import matplotlib.pyplot as plt
import numpy as np
iris = load_iris()
data = iris.data
target = iris.target
#print data[:10]
#print target[10:]
X = data[0:100,[0,2]]
y = target[0:100]
print X[:5]
print y[-5:]
label = np.array(y)
index_0 = np.where(label==0)
plt.scatter(X[index_0,0],X[index_0,1], marker='x',color
= 'b',label = '0',s = 15)
index_1 =np.where(label==1)
plt.scatter(X[index_1,0],X[index_1,1], marker='o',color
= 'r',label = '1',s = 15)
plt.xlabel('X1')
plt.ylabel('X2')
plt.legend(loc = 'upper left')
plt.show() |

±àдÂß¼»Ø¹éÄ£Ð͵ÄÀà
import numpy
as np
class logistic(object):
def __init__(self):
self.W = None
def train(self,X,y,learn_rate = 0.01,num_iters
= 5000):
num_train,num_feature = X.shape
#init the weight
self.W = 0.001*np.random.randn(num_feature,1).reshape((-1,1))
loss = []
for i in range(num_iters):
error,dW = self.compute_loss(X,y)
self.W += -learn_rate*dW
loss.append(error)
if i%200==0:
print 'i=%d,error=%f' %(i,error)
return loss
def compute_loss(self,X,y):
num_train = X.shape[0]
h = self.output(X)
loss = -np.sum((y*np.log(h) + (1-y)*np.log((1-h))))
loss = loss / num_train
dW = X.T.dot((h-y)) / num_train
return loss,dW
def output(self,X):
g = np.dot(X,self.W)
return self.sigmod(g)
def sigmod(self,X):
return 1/(1+np.exp(-X))
def predict(self,X_test):
h = self.output(X_test)
y_pred = np.where(h>=0.5,1,0)
return y_pred |
ѵÁ·²âÊÔһϣ¬²¢ÇÒ¿ÉÊÓ»¯¸ú×ÙµÄËðʧloss
import matplotlib.pyplot as plt
y = y.reshape((-1,1))
#add the x0=1
one = np.ones((X.shape[0],1))
X_train = np.hstack((one,X))
classify = logistic()
loss = classify.train(X_train,y)
print classify.W
plt.plot(loss)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show() |

×îºó¿ÉÊÓ»¯¡°¾ö²ß±ß½ç¡±
label = np.array(y)
index_0 = np.where(label==0)
plt.scatter(X[index_0,0],X[index_0,1], marker='x',color
= 'b',label = '0',s = 15)
index_1 =np.where(label==1)
plt.scatter(X[index_1,0],X[index_1,1], marker='o',color
= 'r',label = '1',s = 15)
#show the decision boundary
x1 = np.arange(4,7.5,0.5)
x2 = (- classify.W[0] - classify.W[1]*x1) / classify.W[2]
plt.plot(x1,x2,color = 'black')
plt.xlabel('X1')
plt.ylabel('X2')
plt.legend(loc = 'upper left')
plt.show()
|

ps:¿ÉÒÔ¿´³ö£¬×îºóѧϰµÃµ½µÄ¾ö²ß±ß½ç£¨·ÖÀà±ß½ç£©³É¹¦µÄ¸ô¿ªÁËÁ½¸öÀà±ð¡£µ±È»£¬·ÖÀàÎÊÌ⻹Óжà·ÖÀàÎÊÌ⣨һ¶Ô¶à£©£¬»¹ÓоÍÊǶÔÓÚ·ÇÏßÐÔ·ÖÀàÎÊÌ⣬ÏêÇéÇë²Î¼û·ÖÏíµÄ×ÊÁÏ¡£ |