> 文档中心 > python纯Numpy实现线性回归(随机梯度下降法)

python纯Numpy实现线性回归(随机梯度下降法)

用Numpy实现线性回归
一、理论推导
损失函数在这里插入图片描述
二、代码实现
引入numpy包和random包

import randomimport numpy as np

主要函数如下
1.构造数据集

def synthetic_data(w,b,num_examples):    #生成y=Xw+b+噪声  (加噪声是为了验证是否可以拟合)  w=[2,-3.4],b=4.2    X=np.random.normal(0, 1, (num_examples,len(w)))    y=np.matmul(X,w)+b    y+=np.random.normal(0,0.01,y.shape)   #取噪声偏置    return X,y.reshape((-1,1))

2.构造迭代器

def data_iter(bath_size,features,labels):    num_examples=len(features)    indices=list(range(num_examples))    #设置shuffle    random.shuffle(indices)    for i in range(0,num_examples,bath_size): bath_indices=np.array(indices[i:min(i+bath_size,num_examples)]) yield features[bath_indices],labels[bath_indices]

3.定义模型

def Linreg(X,w,b):    return np.matmul(X,w)+b[0]

4.定义损失函数

def squared_loss(y_hat,y,batch_size):    #均方损失    return ((y_hat-y)**2/2)/batch_size

5.定义优化算法

def step_gradient(b_current, w_current, features,labels, learningRate):    b_gradient = 0    w_gradient = 0    N = float(len(features))    for i in range(len(features)): x = features[i,:] y = labels[i,:] b_gradient += 2 * (np.dot(x,w_current) + b_current - y) w_gradient += 2 * x * (np.dot(x,w_current) + b_current - y)    b_gradient = b_gradient / N    w_gradient = w_gradient / N    new_b = b_current - (np.dot(learningRate , b_gradient))    new_w = w_current -np.matrix( (learningRate *w_gradient)).T    return [new_b, np.array(new_w)]

6.定义梯度更新

def gradient_descent_runner(features,labels, starting_b, starting_w, learning_rate): # num_iteration 迭代次数    b = starting_b    w = starting_w    b, w = step_gradient(b, w, features,labels, learning_rate)    return [b, w]

7.定义训练函数

lr=0.01num_epochs=10batch_size=10net=Linregloss=squared_lossfor epoch in range(num_epochs):    for X,y in data_iter(batch_size,features,labels): l=loss(net(X,w,b),y,batch_size) l.sum() b,w=gradient_descent_runner(X,y,b,w,lr)    train_l=loss(net(features,w,b),labels,batch_size)    print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')

完整代码如下:

import randomimport numpy as np#from d2l import torch as d2l#--------构造数据集--------#def synthetic_data(w,b,num_examples):    #生成y=Xw+b+噪声  (加噪声是为了验证是否可以拟合)  w=[2,-3.4],b=4.2    X=np.random.normal(0, 1, (num_examples,len(w)))    y=np.matmul(X,w)+b    y+=np.random.normal(0,0.01,y.shape)   #取噪声偏置    return X,y.reshape((-1,1))true_w=(2,-3.4)true_b=4.2features,labels=synthetic_data(true_w,true_b,1000)   #训练数据#--------构造迭代器--------#def data_iter(bath_size,features,labels):    num_examples=len(features)    indices=list(range(num_examples))    #设置shuffle    random.shuffle(indices)    for i in range(0,num_examples,bath_size): bath_indices=np.array(indices[i:min(i+bath_size,num_examples)]) yield features[bath_indices],labels[bath_indices]#--------定义初始化模型参数--------#w=np.random.normal(0,0.01,size=(2,1)) #size需要与features维度一致b=np.zeros((1))#--------定义模型--------#def Linreg(X,w,b):    return np.matmul(X,w)+b[0]#--------定义损失函数--------#def squared_loss(y_hat,y,batch_size):    #均方损失    return ((y_hat-y)**2/2)/batch_size#--------定义优化算法--------#def step_gradient(b_current, w_current, features,labels, learningRate):    b_gradient = 0    w_gradient = 0    N = float(len(features))    for i in range(len(features)): x = features[i,:] y = labels[i,:] b_gradient += 2 * (np.dot(x,w_current) + b_current - y) w_gradient += 2 * x * (np.dot(x,w_current) + b_current - y)    b_gradient = b_gradient / N    w_gradient = w_gradient / N    new_b = b_current - (np.dot(learningRate , b_gradient))    new_w = w_current -np.matrix( (learningRate *w_gradient)).T    return [new_b, np.array(new_w)]#--------定义梯度更新--------#def gradient_descent_runner(features,labels, starting_b, starting_w, learning_rate): # num_iteration 迭代次数    b = starting_b    w = starting_w    b, w = step_gradient(b, w, features,labels, learning_rate)    return [b, w]#--------定义主函数--------#lr=0.03num_epochs=3batch_size=10net=Linregloss=squared_lossfor epoch in range(num_epochs):    for X,y in data_iter(batch_size,features,labels): l=loss(net(X,w,b),y,batch_size) l.sum() b,w=gradient_descent_runner(X,y,b,w,lr)    train_l=loss(net(features,w,b),labels,batch_size)    print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')print(b,w)

三、实现过程中的问题
1.发现使用直接使用偏导公式和使用torch框架自动求导收敛速度不同。

妊娠纹产品大全