import warnings 

warnings.filterwarnings('ignore')

from sklearn.linear_model import LinearRegression 

regr = LinearRegression() #构造一个初始的线性回归模型并命名为regr

X = [[1], [2], [4], [5]]

Y = [2, 4, 6, 8] 

regr.fit(X, Y) #用fit()函数完成模型搭建，此时的regr就是一个搭建好的线性回归模型

y = regr.predict([[1.5]])

print(y)

y = regr.predict([[1.5], [2.5], [4.5]])

print(y)

[2.9]
[2.9 4.3 7.1]

import matplotlib.pyplot as plt

plt.scatter(X, Y) 
plt.plot(X, regr.predict(X)) 
plt.show()

#通过coef_和intercept_属性可以得到此时趋势线的系数和截距

print('系数a：' + str(regr.coef_[0])) 
print('截距b：' + str(regr.intercept_))

系数a：1.4000000000000004
截距b：0.7999999999999989

import pandas as pd
df = pd.read_excel('IT行业收入表.xlsx')
df.head()

# 此时的工龄为自变量，薪水为因变量，通过如下代码进行自变量、因变量选取
X = df[['工龄']]
Y = df['薪水']

# 通过如下代码可以将此时的散点图绘制出来：
from matplotlib import pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.scatter(X,Y)
plt.xlabel('工龄')
plt.ylabel('薪水')
plt.show()

from sklearn.linear_model import LinearRegression
regr = LinearRegression()  # 引入模型
regr.fit(X,Y)  # 训练模型

LinearRegression()

LinearRegression()

plt.scatter(X,Y)
plt.plot(X, regr.predict(X), color='red')  # color='red'设置为红色
plt.xlabel('工龄')
plt.ylabel('薪水')
plt.show()

print('系数a为:' + str(regr.coef_[0]))
print('截距b为:' + str(regr.intercept_))

系数a为:2497.1513476046866
截距b为:10143.131966873787

# 生成二次项数据：
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=2)
X_ = poly_reg.fit_transform(X)

print(X_[0:5])

[[1.   0.   0.  ]
 [1.   0.1  0.01]
 [1.   0.2  0.04]
 [1.   0.3  0.09]
 [1.   0.3  0.09]]

# 模型训练
regr = LinearRegression()
regr.fit(X_, Y)

LinearRegression()

LinearRegression()

# 可视化
plt.scatter(X,Y)
plt.plot(X, regr.predict(X_), color='red')
plt.show()

# 打印系数和常数项
print(regr.coef_)  # 获取系数a, b 
print(regr.intercept_)  # 获取常数项c

[   0.         -743.68080444  400.80398224]
13988.159332096886

# 1.读取数据
import pandas
df = pandas.read_excel('IT行业收入表.xlsx')
X = df[['工龄']]
Y = df['薪水']

# 2.模型训练
from sklearn.linear_model import LinearRegression
regr = LinearRegression()
regr.fit(X,Y)

# 3.模型可视化
from matplotlib import pyplot as plt
plt.scatter(X,Y)
plt.plot(X, regr.predict(X), color='red')  # color='red'设置为红色
plt.xlabel('工龄')
plt.ylabel('薪水')
plt.show()

# 4.线性回归方程构造
print('系数a为:' + str(regr.coef_[0]))
print('截距b为:' + str(regr.intercept_))

系数a为:2497.1513476046866
截距b为:10143.131966873787

import statsmodels.api as sm
X2 = sm.add_constant(X)
est = sm.OLS(Y, X2).fit()
est.summary()  # 在非Jupyter Notebook的编辑器中需要写成print(est.summary())

import pandas as pd
df = pd.read_excel('客户价值数据表.xlsx')
df.head()

X = df[['历史贷款金额', '贷款次数', '学历', '月收入', '性别']]
Y = df['客户价值']

from sklearn.linear_model import LinearRegression
regr = LinearRegression()
regr.fit(X,Y)

LinearRegression()

LinearRegression()

print('各系数为:' + str(regr.coef_))
print('常数项系数k0为:' + str(regr.intercept_))

各系数为:[5.71421731e-02 9.61723492e+01 1.13452022e+02 5.61326459e-02
 1.97874093e+00]
常数项系数k0为:-208.42004079958383

import statsmodels.api as sm  # 引入线性回归模型评估相关库
X2 = sm.add_constant(X)
est = sm.OLS(Y, X2).fit()
est.summary()

Dep. Variable:	薪水	R-squared:	0.855
Model:	OLS	Adj. R-squared:	0.854
Method:	Least Squares	F-statistic:	578.5
Date:	Fri, 14 Mar 2025	Prob (F-statistic):	6.69e-43
Time:	19:56:30	Log-Likelihood:	-930.83
No. Observations:	100	AIC:	1866.
Df Residuals:	98	BIC:	1871.
Df Model:	1
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
const	1.014e+04	507.633	19.981	0.000	9135.751	1.12e+04
工龄	2497.1513	103.823	24.052	0.000	2291.118	2703.185

Omnibus:	0.287	Durbin-Watson:	0.555
Prob(Omnibus):	0.867	Jarque-Bera (JB):	0.463
Skew:	0.007	Prob(JB):	0.793
Kurtosis:	2.667	Cond. No.	9.49

	客户价值	历史贷款金额	贷款次数	学历	月收入	性别
0	1150	6488	2	2	9567	1
1	1157	5194	4	2	10767	0
2	1163	7066	3	2	9317	0
3	983	3550	3	2	10517	0
4	1205	7847	3	3	11267	1

Dep. Variable:	客户价值	R-squared:	0.571
Model:	OLS	Adj. R-squared:	0.553
Method:	Least Squares	F-statistic:	32.44
Date:	Fri, 14 Mar 2025	Prob (F-statistic):	6.41e-21
Time:	19:56:30	Log-Likelihood:	-843.50
No. Observations:	128	AIC:	1699.
Df Residuals:	122	BIC:	1716.
Df Model:	5
Covariance Type:	nonrobust

第03章线性回归模型¶

3.1 一元线性回归¶

3.1.1 一元线性回归的基本数学原理¶

3.1.2 一元线性回归的代码实现¶

3.1.3 案例实战: 不同行业工龄与薪水的线性回归模型¶

补充知识点: 模型优化¶

3.2 线性回归模型评估¶

3.2.1 模型评估的编程实现¶

3.2.2 模型评估的数学原理¶

3.3 多元线性回归¶

3.3.1 多元线性回归的数学原理和代码实现¶

3.3.2 案例实战: 客户价值预测模型¶

Omnibus:	1.597	Durbin-Watson:	2.155
Prob(Omnibus):	0.450	Jarque-Bera (JB):	1.538
Skew:	0.264	Prob(JB):	0.464
Kurtosis:	2.900	Cond. No.	1.28e+05

	工龄	薪水
0	0.0	10808
1	0.1	13611
2	0.2	12306
3	0.3	12151
4	0.3	13057

	coef	std err	t	P>\|t\|	[0.025	0.975]
const	-208.4200	163.810	-1.272	0.206	-532.699	115.859
历史贷款金额	0.0571	0.010	5.945	0.000	0.038	0.076
贷款次数	96.1723	25.962	3.704	0.000	44.778	147.567
学历	113.4520	37.909	2.993	0.003	38.406	188.498
月收入	0.0561	0.019	2.941	0.004	0.018	0.094
性别	1.9787	32.286	0.061	0.951	-61.934	65.891

第03章 线性回归模型¶

3.1 一元线性回归¶

3.1.1 一元线性回归的基本数学原理¶

3.1.2 一元线性回归的代码实现¶

3.1.3 案例实战: 不同行业工龄与薪水的线性回归模型¶

补充知识点: 模型优化¶

3.2 线性回归模型评估¶

3.2.1 模型评估的编程实现¶

3.2.2 模型评估的数学原理¶

3.3 多元线性回归¶

3.3.1 多元线性回归的数学原理和代码实现¶

3.3.2 案例实战: 客户价值预测模型¶

第03章线性回归模型¶