python纯Numpy实现线性回归(随机梯度下降法)
用Numpy实现线性回归
一、理论推导
二、代码实现
引入numpy包和random包
import randomimport numpy as np
主要函数如下
1.构造数据集
def synthetic_data(w,b,num_examples): #生成y=Xw+b+噪声 (加噪声是为了验证是否可以拟合) w=[2,-3.4],b=4.2 X=np.random.normal(0, 1, (num_examples,len(w))) y=np.matmul(X,w)+b y+=np.random.normal(0,0.01,y.shape) #取噪声偏置 return X,y.reshape((-1,1))
2.构造迭代器
def data_iter(bath_size,features,labels): num_examples=len(features) indices=list(range(num_examples)) #设置shuffle random.shuffle(indices) for i in range(0,num_examples,bath_size): bath_indices=np.array(indices[i:min(i+bath_size,num_examples)]) yield features[bath_indices],labels[bath_indices]
3.定义模型
def Linreg(X,w,b): return np.matmul(X,w)+b[0]
4.定义损失函数
def squared_loss(y_hat,y,batch_size): #均方损失 return ((y_hat-y)**2/2)/batch_size
5.定义优化算法
def step_gradient(b_current, w_current, features,labels, learningRate): b_gradient = 0 w_gradient = 0 N = float(len(features)) for i in range(len(features)): x = features[i,:] y = labels[i,:] b_gradient += 2 * (np.dot(x,w_current) + b_current - y) w_gradient += 2 * x * (np.dot(x,w_current) + b_current - y) b_gradient = b_gradient / N w_gradient = w_gradient / N new_b = b_current - (np.dot(learningRate , b_gradient)) new_w = w_current -np.matrix( (learningRate *w_gradient)).T return [new_b, np.array(new_w)]
6.定义梯度更新
def gradient_descent_runner(features,labels, starting_b, starting_w, learning_rate): # num_iteration 迭代次数 b = starting_b w = starting_w b, w = step_gradient(b, w, features,labels, learning_rate) return [b, w]
7.定义训练函数
lr=0.01num_epochs=10batch_size=10net=Linregloss=squared_lossfor epoch in range(num_epochs): for X,y in data_iter(batch_size,features,labels): l=loss(net(X,w,b),y,batch_size) l.sum() b,w=gradient_descent_runner(X,y,b,w,lr) train_l=loss(net(features,w,b),labels,batch_size) print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
完整代码如下:
import randomimport numpy as np#from d2l import torch as d2l#--------构造数据集--------#def synthetic_data(w,b,num_examples): #生成y=Xw+b+噪声 (加噪声是为了验证是否可以拟合) w=[2,-3.4],b=4.2 X=np.random.normal(0, 1, (num_examples,len(w))) y=np.matmul(X,w)+b y+=np.random.normal(0,0.01,y.shape) #取噪声偏置 return X,y.reshape((-1,1))true_w=(2,-3.4)true_b=4.2features,labels=synthetic_data(true_w,true_b,1000) #训练数据#--------构造迭代器--------#def data_iter(bath_size,features,labels): num_examples=len(features) indices=list(range(num_examples)) #设置shuffle random.shuffle(indices) for i in range(0,num_examples,bath_size): bath_indices=np.array(indices[i:min(i+bath_size,num_examples)]) yield features[bath_indices],labels[bath_indices]#--------定义初始化模型参数--------#w=np.random.normal(0,0.01,size=(2,1)) #size需要与features维度一致b=np.zeros((1))#--------定义模型--------#def Linreg(X,w,b): return np.matmul(X,w)+b[0]#--------定义损失函数--------#def squared_loss(y_hat,y,batch_size): #均方损失 return ((y_hat-y)**2/2)/batch_size#--------定义优化算法--------#def step_gradient(b_current, w_current, features,labels, learningRate): b_gradient = 0 w_gradient = 0 N = float(len(features)) for i in range(len(features)): x = features[i,:] y = labels[i,:] b_gradient += 2 * (np.dot(x,w_current) + b_current - y) w_gradient += 2 * x * (np.dot(x,w_current) + b_current - y) b_gradient = b_gradient / N w_gradient = w_gradient / N new_b = b_current - (np.dot(learningRate , b_gradient)) new_w = w_current -np.matrix( (learningRate *w_gradient)).T return [new_b, np.array(new_w)]#--------定义梯度更新--------#def gradient_descent_runner(features,labels, starting_b, starting_w, learning_rate): # num_iteration 迭代次数 b = starting_b w = starting_w b, w = step_gradient(b, w, features,labels, learning_rate) return [b, w]#--------定义主函数--------#lr=0.03num_epochs=3batch_size=10net=Linregloss=squared_lossfor epoch in range(num_epochs): for X,y in data_iter(batch_size,features,labels): l=loss(net(X,w,b),y,batch_size) l.sum() b,w=gradient_descent_runner(X,y,b,w,lr) train_l=loss(net(features,w,b),labels,batch_size) print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')print(b,w)
三、实现过程中的问题
1.发现使用直接使用偏导公式和使用torch框架自动求导收敛速度不同。