# !pip install numpy pandas statsmodels causalgraphicalmodels
import pandas as pd, numpy as np, statsmodels.api as sm, statsmodels.formula.api as smf
import sympy, matplotlib, math, random, networkx
from causalgraphicalmodels import CausalGraphicalModel
from statsmodels.iolib.summary2 import summary_col
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("Z","Y"),
("Z","X"),
("X","Y")])
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(1234567) # set MC seed
n = 1000 # sample size
Z = np.random.normal(0,1, 1000).reshape((1000, 1)) # generate Z, the distribution of the variables do not really matter
X = 0.5*Z + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = X + 2*Z + np.random.normal(0, 1, 1000).reshape((1000, 1)) #Real effect is 3.
# Create dataframe
D = np.hstack((Z, X, Y))
data = pd.DataFrame(D, columns = ["Z", "X", "Y"])
# Regressions
no_control = "Y ~ X" #Wrong, not controlling by the confounder Z
using_control = "Y ~ X + Z" # Correct
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("Z","X"),
("X","Y")],
latent_edges=[("Z","Y")])
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(1234567) # set MC seed
n = 1000 # sample size
U = np.random.normal(0, 1, 1000).reshape((1000, 1))
Z = 2*U + np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate Z
X = 0.5*Z + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = X + 2*U + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((U, Z, X, Y))
data = pd.DataFrame(D, columns = ["U", "Z", "X", "Y"])
# Regressions
no_control = "Y ~ X" #Wrong, not controlling for confounder
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("Z","Y"),
("X","Y")],
latent_edges=[("Z","X")])
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(1234567) # set MC seed
n = 1000 # sample size
U = np.random.normal(0, 1, 1000).reshape((1000, 1))
Z = 2*U + np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate Z
X = 0.5*U + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = X + 2*Z + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((U, Z, X, Y))
data = pd.DataFrame(D, columns = ["U", "Z", "X", "Y"])
# Regressions
no_control = "Y ~ X" #Wrong, not controlling for confounder
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","M","X"],
edges=[("Z","M"),
("Z","X"),
("X","M"),
("M","Y")]
)
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(1234567) # set MC seed
n = 1000 # sample size
Z = np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate Z
X = 0.5*Z + np.random.normal(0, 1, 1000).reshape((1000, 1))
M = 1.5*Z + 0.5*X + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = 2*M + np.random.normal(0, 1, 1000).reshape((1000, 1)) #Efecto marginal de X en Y es 2.
# Create dataframe
D = np.hstack((Z, X, M, Y))
data = pd.DataFrame(D, columns = ["Z", "X", "M", "Y"])
# Regressions
no_control = "Y ~ X" #Wrong, not controlling for confounder
using_control = "Y ~ X + Z" #classical
using_m = "Y ~ X + Z + M" #Wrong, adding M will yield the direct effect of M on Y,but we are interested in the effect of X.
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
using_m = smf.ols(using_m,data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control,using_m],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","M","X"],
edges=[("Z","X"),
("X","M"),
("M","Y")],
latent_edges=[("Z","M")]
)
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(123456) # set MC seed
n = 1000 # sample size
U = np.random.normal(0, 1, 1000).reshape((1000, 1))
Z = 2*U + np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate Z
X = 0.5*Z + np.random.normal(0, 1, 1000).reshape((1000, 1))
M = 1.5*U + 0.5*X + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = 2*M + np.random.normal(0, 1, 1000).reshape((1000, 1)) #Efecto marginal de X en Y es 2.
# Create dataframe
D = np.hstack((U, Z, X, M, Y))
data = pd.DataFrame(D, columns = ["U","Z", "X", "M", "Y"])
# Regressions
no_control = "Y ~ X" #Wrong, not controlling for confounder
using_control = "Y ~ X + Z" #classical
using_m = "Y ~ X + Z + M" #Wrong, adding M will yield the direct effect of M on Y,but we are interested in the effect of X.
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
using_m = smf.ols(using_m,data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control,using_m],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","M","X"],
edges=[("Z","M"),
("X","M"),
("M","Y")],
latent_edges=[("X","Z")]
)
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(123456) # set MC seed
n = 1000 # sample size
U = np.random.normal(0, 1, 1000).reshape((1000, 1))
Z = 2*U + np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate Z
X = 0.5*U + np.random.normal(0, 1, 1000).reshape((1000, 1))
M = 1.5*Z + 0.5*X + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = 2*M + np.random.normal(0, 1, 1000).reshape((1000, 1)) #Efecto marginal de X en Y es 2.
# Create dataframe
D = np.hstack((U, Z, X, M, Y))
data = pd.DataFrame(D, columns = ["U","Z", "X", "M", "Y"])
# Regressions
no_control = "Y ~ X" #Wrong, not controlling for confounder
using_control = "Y ~ X + Z" #classical
using_m = "Y ~ X + Z + M" #Wrong, adding M will yield the direct effect of M on Y,but we are interested in the effect of X.
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
using_m = smf.ols(using_m,data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control,using_m],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("X","Y")],
latent_edges=[("X","Z"),("Z","Y")]
)
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(12345676) # set MC seed
n = 1000 # sample size
U_1 = np.random.normal(0, 1, 1000).reshape((1000, 1))
U_2 = np.random.normal(0, 1, 1000).reshape((1000, 1))
Z = 0.5*U_1 + 0.5*U_2 + np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate Z
X = 2*U_1 + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = X + 2*U_2 + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((U_1,U_2, Z, X, Y))
data = pd.DataFrame(D, columns = ["U_1", "U_2", "Z", "X", "Y"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("X","Y"),("Z","Y")],
latent_edges=[("X","Z"),("Z","Y")]
)
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(12345676) # set MC seed
n = 1000 # sample size
U_1 = np.random.normal(0, 1, 1000).reshape((1000, 1))
U_2 = np.random.normal(0, 1, 1000).reshape((1000, 1))
Z = 0.5*U_1 + 0.5*U_2 + np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate Z
X = 2*U_2 + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = X + 2*U_1 + 2*Z + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((U_1,U_2, Z, X, Y))
data = pd.DataFrame(D, columns = ["U_1", "U_2", "Z", "X", "Y"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("Z","Y"),("X","Y")])
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(12345676) # set MC seed
n = 1000 # sample size
Z = np.random.normal(0, 1, 1000).reshape((1000, 1))
X = np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = X + 2*Z + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((Z, X, Y))
data = pd.DataFrame(D, columns = ["Z", "X", "Y"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("Z","X"),("X","Y")])
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(12345676) # set MC seed
n = 1000 # sample size
Z = np.random.normal(0, 1, 1000).reshape((1000, 1))
X = 2*Z + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = X + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((Z, X, Y))
data = pd.DataFrame(D, columns = ["Z", "X", "Y"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("Z","X"),("X","Y")],
latent_edges=[("X","Y")])
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(12345676) # set MC seed
n = 1000 # sample size
U = np.random.normal(0, 1, 1000).reshape((1000, 1))
Z = np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate Z
X = 0.5*Z + 2*U + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = X + 3*U + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((U, Z, X, Y))
data = pd.DataFrame(D, columns = ["U", "Z", "X", "Y"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("X","Z"),("Z","Y")])
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(1234567) # set MC seed
n = 1000 # sample size
X = np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate Z
Z = 0.5*X + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = 2*Z + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((Z, X, Y))
data = pd.DataFrame(D, columns = ["Z", "X", "Y"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["M","Z","Y","X"],
edges=[("X","M"),("M","Z"),("M","Y")])
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(1234567) # set MC seed
n = 1000 # sample size
X = np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate X
M = 4*X + np.random.normal(0, 1, 1000).reshape((1000, 1))
Z = 0.5*M + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = 0.25*M + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((M,Z, X, Y))
data = pd.DataFrame(D, columns = ["M","Z", "X", "Y"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["M","Z","Y","X"],
edges=[("X","M"),("Z","M"),("M","Y")])
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(1234567) # set MC seed
n = 1000 # sample size
X = np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate X
Z = np.random.normal(0, 1, 1000).reshape((1000, 1))
M = 2*X + 3*Z + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = 0.5*M + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((M,Z, X, Y))
data = pd.DataFrame(D, columns = ["M","Z", "X", "Y"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("X","Z"),("X","Y")])
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(1234567) # set MC seed
n = 1000 # sample size
X = np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate X
Z = 2*X + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = X + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((Z, X, Y))
data = pd.DataFrame(D, columns = ["Z", "X", "Y"])
data = pd.DataFrame(D, columns = ["Z", "X", "Y"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X","W"],
edges=[("X","Z"),("Z","W"),("X","Y")],
latent_edges=[("W","Y")])
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(1234567) # set MC seed
n = 1000 # sample size
U = np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate U
X = np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate X
Z = 2*X + np.random.normal(0, 1, 1000).reshape((1000, 1))
W = Z + 2*U + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = X + U + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((Z, X, Y, W , U))
data = pd.DataFrame(D, columns = ["Z", "X", "Y","W","U"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("X","Z"),("X","Y")],
latent_edges=[("Z","Y")])
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(123456) # set MC seed
n = 1000 # sample size
U = np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate U
X = np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate X
Z = 2*X + U + np.random.normal(0, 1, 1000).reshape((1000, 1))
Y = X + 1.5*U + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((Z, X, Y, U))
data = pd.DataFrame(D, columns = ["Z", "X", "Y","U"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("X","Z"),("X","Y"),("Y","Z")],
)
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(123456) # set MC seed
n = 1000 # sample size
X = np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate X
Y = X + np.random.normal(0, 1, 1000).reshape((1000, 1))
Z = 2*X + 0.5*Y + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((Z, X, Y))
data = pd.DataFrame(D, columns = ["Z", "X", "Y"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)
sprinkler = CausalGraphicalModel(nodes=["Z","Y","X"],
edges=[("X","Y"),("Y","Z")],
)
sprinkler.draw()
# Set Seed
# to make the results replicable (generating random numbers)
np.random.seed(123456) # set MC seed
n = 1000 # sample size
X = np.random.normal(0, 1, 1000).reshape((1000, 1)) # generate X
Y = X + np.random.normal(0, 1, 1000).reshape((1000, 1))
Z = 3*Y + np.random.normal(0, 1, 1000).reshape((1000, 1))
# Create dataframe
D = np.hstack((Z, X, Y))
data = pd.DataFrame(D, columns = ["Z", "X", "Y"])
# Regressions
no_control = "Y ~ X"
using_control = "Y ~ X + Z" #classical
no_control = smf.ols(no_control , data=data).fit()
using_control = smf.ols(using_control , data=data).fit()
# Summary results
dfoutput = summary_col([no_control,using_control],stars=True)
print(dfoutput)