!pip install statsmodels==0.14.0
Requirement already satisfied: statsmodels==0.14.0 in /root/venv/lib/python3.9/site-packages (0.14.0)
Requirement already satisfied: pandas>=1.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from statsmodels==0.14.0) (1.2.5)
Requirement already satisfied: packaging>=21.3 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from statsmodels==0.14.0) (21.3)
Requirement already satisfied: numpy>=1.18 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from statsmodels==0.14.0) (1.23.4)
Requirement already satisfied: scipy!=1.9.2,>=1.4 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from statsmodels==0.14.0) (1.9.3)
Requirement already satisfied: patsy>=0.5.2 in /root/venv/lib/python3.9/site-packages (from statsmodels==0.14.0) (0.5.3)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from packaging>=21.3->statsmodels==0.14.0) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7.3 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from pandas>=1.0->statsmodels==0.14.0) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from pandas>=1.0->statsmodels==0.14.0) (2022.5)
Requirement already satisfied: six in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from patsy>=0.5.2->statsmodels==0.14.0) (1.16.0)
WARNING: You are using pip version 22.0.4; however, version 23.1.2 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import scipy.stats
# Generate some random data
np.random.seed(123)
n = 100
x = np.random.normal(size=n)
x2 = x ** 2
y = 2 + 3 * x + 4 * x2 + np.random.normal(size=n)
# Fit the model
X = sm.add_constant(np.column_stack((x, x2)))
model = sm.OLS(y, X).fit()
# Define the range of x values to plot
x_range = np.linspace(x.min(), x.max(), 100)
x2_range = x_range ** 2
X_range = sm.add_constant(np.column_stack((x_range, x2_range)))
# Get the predicted values and standard errors for the range of x values
y_pred = model.predict(X_range)
y_std = model.get_prediction(X_range).se_mean
# Plot the data and regression line with confidence intervals
fig, ax = plt.subplots()
ax.scatter(x, y, alpha=0.5)
ax.plot(x_range, y_pred, color='b', label='Regression line')
ax.fill_between(x_range, y_pred - scipy.stats.t.ppf(q=0.975,df=n-3) * y_std, y_pred + scipy.stats.t.ppf(q=0.975,df=n-3) * y_std, color='gray', alpha=0.2, label='95% confidence interval')
ax.legend(loc='upper left')
plt.ylabel('Y')
plt.xlabel('X')
plt.show()
# Calculate the marginal effect of x on y for each value of x in the range
marginal_effect = model.params[1] + 2 * model.params[2] * x_range
# Calculate the standard error of the marginal effect for each value of x in the range
se = model.bse[1] + 2 * x_range * model.bse[2]
# Calculate the upper and lower bounds of the 95% confidence interval for each value of x in the range
upper_ci = marginal_effect + scipy.stats.t.ppf(q=0.975,df=n-3) * se
lower_ci = marginal_effect - scipy.stats.t.ppf(q=0.975,df=n-3) * se
# Plot the marginal effect with 95% confidence intervals
plt.plot(x_range, marginal_effect, color='blue', linewidth=2)
plt.fill_between(x_range, lower_ci, upper_ci, color='gray', alpha=0.3)
plt.xlabel('x')
plt.ylabel('Marginal effect of x on y')
plt.axhline(0, color='red', linestyle='--')
plt.show()
!pip install statsmodels==0.14.0
Requirement already satisfied: statsmodels==0.14.0 in /root/venv/lib/python3.9/site-packages (0.14.0)
Requirement already satisfied: scipy!=1.9.2,>=1.4 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from statsmodels==0.14.0) (1.9.3)
Requirement already satisfied: packaging>=21.3 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from statsmodels==0.14.0) (21.3)
Requirement already satisfied: patsy>=0.5.2 in /root/venv/lib/python3.9/site-packages (from statsmodels==0.14.0) (0.5.3)
Requirement already satisfied: pandas>=1.0 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from statsmodels==0.14.0) (1.2.5)
Requirement already satisfied: numpy>=1.18 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from statsmodels==0.14.0) (1.23.4)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from packaging>=21.3->statsmodels==0.14.0) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7.3 in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from pandas>=1.0->statsmodels==0.14.0) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in /shared-libs/python3.9/py/lib/python3.9/site-packages (from pandas>=1.0->statsmodels==0.14.0) (2022.5)
Requirement already satisfied: six in /shared-libs/python3.9/py-core/lib/python3.9/site-packages (from patsy>=0.5.2->statsmodels==0.14.0) (1.16.0)
WARNING: You are using pip version 22.0.4; however, version 23.1.2 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
# Generate some random data
np.random.seed(123)
n = 100
x = np.random.normal(size=n)
z = np.random.normal(size=n)
xz = x * z
y = 2 + 3 * x + 2 * z + 4 * xz + np.random.normal(size=n)
# Fit the model
X = sm.add_constant(np.column_stack((x, z, xz)))
model = sm.OLS(y, X).fit()
# Define the range of x and z values to plot
x_range = np.linspace(x.min(), x.max(), 50)
z_range = np.linspace(z.min(), z.max(), 50)
xx, zz = np.meshgrid(x_range, z_range)
xxz = xx * zz
# Create a meshgrid of x, z, and xz values
X_range = np.column_stack((xx.flatten(), zz.flatten(), xxz.flatten()))
X_range = sm.add_constant(X_range)
# Predict the values of y on the meshgrid of x, z, and xz values
y_pred = model.predict(X_range)
# Reshape the predicted values to match the shape of the meshgrid
y_pred = y_pred.reshape(xx.shape)
# Create a 3D plot of the regression plane
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(xx, zz, y_pred, cmap='viridis')
ax.set_xlabel('x')
ax.set_ylabel('z')
ax.set_zlabel('y')
plt.show()
# Calculate the marginal effect of x on y for each value of z in the range
marginal_effect = model.params[1] + model.params[3] * z_range
# Calculate the standard error of the marginal effect for each value of x in the range
se = model.bse[1] + 2 * z_range * model.bse[3]
# Calculate the upper and lower bounds of the 95% confidence interval for each value of x in the range
upper_ci = marginal_effect + scipy.stats.t.ppf(q=0.975,df=n-4) * se
lower_ci = marginal_effect - scipy.stats.t.ppf(q=0.975,df=n-4) * se
# Plot the marginal effect with 95% confidence intervals
plt.plot(z_range, marginal_effect, color='blue', linewidth=2)
plt.fill_between(z_range, lower_ci, upper_ci, color='gray', alpha=0.3)
plt.xlabel('z')
plt.ylabel('Marginal effect of x on y')
plt.show()
df = pd.DataFrame({'y': y, 'x': x, 'z': z})
df.to_csv('data_margins.csv', index=False)