#Function to calculate the costdefcompute_cost(x,y,w,b):m=x.shape[0]cost=0foriinrange(m):f_wb=w*x[i]+bcost=cost+(f_wb-y[i])**2total_cost=1/(2*m)*costreturntotal_cost
# load the datasetX_train,y_train=load_house_data()X_features=['size(sqft)','bedrooms','floors','age']fig,ax=plt.subplots(1,4,figsize=(12,3),sharey=True)foriinrange(len(ax)):ax[i].scatter(X_train[:,i],y_train)ax[i].set_xlabel(X_features[i])ax[0].set_ylabel("Price (1000's)")plt.show()#set alpha to 9.9e-7_,_,hist=run_gradient_descent(X_train,y_train,10,alpha=9.9e-7)
np.random.seed(1)a=np.random.rand(10000000)# very large arraysb=np.random.rand(10000000)tic=time.time()# capture start timec=np.dot(a,b)toc=time.time()# capture end timeprint(f"np.dot(a, b) = {c:.4f}")print(f"Vectorized version duration: {1000*(toc-tic):.4f} ms ")tic=time.time()# capture start timec=my_dot(a,b)toc=time.time()# capture end timeprint(f"my_dot(a, b) = {c:.4f}")print(f"loop version duration: {1000*(toc-tic):.4f} ms ")del(a);del(b)#remove these big arrays from memory
importnumpyasnp# 假设你有一个特征矩阵 X 和目标向量 y# X 的每一行代表一个样本,每一列代表一个特征# y 是目标变量的值defnormal_equation(X,y):# 在特征矩阵 X 的前面加一列全为 1 的列,以便计算偏置项X_b=np.c_[np.ones((X.shape[0],1)),X]# 计算正规方程的解theta=np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)returntheta# 示例数据X=np.array([[1,2],[2,3],[3,4]])# 特征矩阵y=np.array([5,7,9])# 目标变量theta=normal_equation(X,y)print("模型的参数:",theta)
# Plot sigmoid(z) over a range of values from -10 to 10z=np.arange(-10,11)fig,ax=plt.subplots(1,1,figsize=(5,3))# Plot z vs sigmoid(z)ax.plot(z,sigmoid(z),c="b")ax.set_title("Sigmoid function")ax.set_ylabel('sigmoid(z)')ax.set_xlabel('z')draw_vthresh(ax,0)
# Choose values between 0 and 6x0=np.arange(0,6)x1=3-x0fig,ax=plt.subplots(1,1,figsize=(5,4))# Plot the decision boundaryax.plot(x0,x1,c="b")ax.axis([0,4,0,3.5])# Fill the region below the lineax.fill_between(x0,x1,alpha=0.2)# Plot the original dataplot_data(X,y,ax)ax.set_ylabel(r'$x_1$')ax.set_xlabel(r'$x_0$')plt.show()
defgradient_descent(X,y,w_in,b_in,alpha,num_iters):""" Performs batch gradient descent Args: X (ndarray (m,n) : Data, m examples with n features y (ndarray (m,)) : target values w_in (ndarray (n,)): Initial values of model parameters b_in (scalar) : Initial values of model parameter alpha (float) : Learning rate num_iters (scalar) : number of iterations to run gradient descent Returns: w (ndarray (n,)) : Updated values of parameters b (scalar) : Updated value of parameter """# An array to store cost J and w's at each iteration primarily for graphing laterJ_history=[]w=copy.deepcopy(w_in)#avoid modifying global w within functionb=b_inforiinrange(num_iters):# Calculate the gradient and update the parametersdj_db,dj_dw=compute_gradient_logistic(X,y,w,b)# Update Parameters using w, b, alpha and gradientw=w-alpha*dj_dwb=b-alpha*dj_db# Save cost J at each iterationifi<100000:# prevent resource exhaustion J_history.append(compute_cost_logistic(X,y,w,b))# Print cost every at intervals 10 times or as many iterations if < 10ifi%math.ceil(num_iters/10)==0:print(f"Iteration {i:4d}: Cost {J_history[-1]} ")returnw,b,J_history#return final w,b and J history for graphing
defcompute_cost_linear_reg(X,y,w,b,lambda_=1):""" Computes the cost over all examples Args: X (ndarray (m,n): Data, m examples with n features y (ndarray (m,)): target values w (ndarray (n,)): model parameters b (scalar) : model parameter lambda_ (scalar): Controls amount of regularization Returns: total_cost (scalar): cost """m=X.shape[0]n=len(w)cost=0.foriinrange(m):f_wb_i=np.dot(X[i],w)+b#(n,)(n,)=scalar, see np.dotcost=cost+(f_wb_i-y[i])**2#scalar cost=cost/(2*m)#scalar reg_cost=0forjinrange(n):reg_cost+=(w[j]**2)#scalarreg_cost=(lambda_/(2*m))*reg_cost#scalartotal_cost=cost+reg_cost#scalarreturntotal_cost#scalar
defcompute_gradient_linear_reg(X,y,w,b,lambda_):""" Computes the gradient for linear regression Args: X (ndarray (m,n): Data, m examples with n features y (ndarray (m,)): target values w (ndarray (n,)): model parameters b (scalar) : model parameter lambda_ (scalar): Controls amount of regularization Returns: dj_dw (ndarray (n,)): The gradient of the cost w.r.t. the parameters w. dj_db (scalar): The gradient of the cost w.r.t. the parameter b. """m,n=X.shape#(number of examples, number of features)dj_dw=np.zeros((n,))dj_db=0.foriinrange(m):err=(np.dot(X[i],w)+b)-y[i]forjinrange(n):dj_dw[j]=dj_dw[j]+err*X[i,j]dj_db=dj_db+errdj_dw=dj_dw/mdj_db=dj_db/mforjinrange(n):dj_dw[j]=dj_dw[j]+(lambda_/m)*w[j]returndj_db,dj_dw
defcompute_cost_logistic_reg(X,y,w,b,lambda_=1):""" Computes the cost over all examples Args: Args: X (ndarray (m,n): Data, m examples with n features y (ndarray (m,)): target values w (ndarray (n,)): model parameters b (scalar) : model parameter lambda_ (scalar): Controls amount of regularization Returns: total_cost (scalar): cost """m,n=X.shapecost=0.foriinrange(m):z_i=np.dot(X[i],w)+b#(n,)(n,)=scalar, see np.dotf_wb_i=sigmoid(z_i)#scalarcost+=-y[i]*np.log(f_wb_i)-(1-y[i])*np.log(1-f_wb_i)#scalarcost=cost/m#scalarreg_cost=0forjinrange(n):reg_cost+=(w[j]**2)#scalarreg_cost=(lambda_/(2*m))*reg_cost#scalartotal_cost=cost+reg_cost#scalarreturntotal_cost#scalar
defcompute_gradient_logistic_reg(X,y,w,b,lambda_):""" Computes the gradient for linear regression Args: X (ndarray (m,n): Data, m examples with n features y (ndarray (m,)): target values w (ndarray (n,)): model parameters b (scalar) : model parameter lambda_ (scalar): Controls amount of regularization Returns dj_dw (ndarray Shape (n,)): The gradient of the cost w.r.t. the parameters w. dj_db (scalar) : The gradient of the cost w.r.t. the parameter b. """m,n=X.shapedj_dw=np.zeros((n,))#(n,)dj_db=0.0#scalarforiinrange(m):f_wb_i=sigmoid(np.dot(X[i],w)+b)#(n,)(n,)=scalarerr_i=f_wb_i-y[i]#scalarforjinrange(n):dj_dw[j]=dj_dw[j]+err_i*X[i,j]#scalardj_db=dj_db+err_idj_dw=dj_dw/m#(n,)dj_db=dj_db/m#scalarforjinrange(n):dj_dw[j]=dj_dw[j]+(lambda_/m)*w[j]returndj_db,dj_dw