import numpy as np
import scipy.stats as st
from matplotlib import pyplot as plt
from matplotlib.ticker import AutoMinorLocator
import seaborn as sns
import warnings
'ignore') warnings.filterwarnings(
Statistics classes in social sciences, during an undergraduate degree, revolve around p-values mainly. Despite that, I have never seen the mention of p-value distributions. So, here we go:
How does the p-value distribution look like when there is no effect?
= [] # a bag to hold p-values
p_val_list
for i in range(0, 1000000):
= np.random.normal(loc=100, scale=15, size=51) # sampling from a normal distribution with mean 100, std 15
ctrl = np.random.normal(loc=100, scale=15, size=51) # sampling from a normal distribution with mean 100, std 15
trt = st.ttest_ind(trt, ctrl)[1] # doing a t-test and grabbing the p-value
p_val
# storing the p-value in the bag p_val_list.append(p_val)
= plt.figure(figsize=(9, 5))
fig = fig.add_subplot(1, 1, 1)
ax
='plain', axis='y')
plt.ticklabel_format(style
=100, edgecolor='black', linewidth=.9)
ax.hist(p_val_list, bins5))
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(='both', width=2)
ax.tick_params(which='major', length=7)
ax.tick_params(which='minor', length=4)
ax.tick_params(which
0, 1000000)
ax.set_ylim('When the true effect size = 0') plt.title(
Text(0.5, 1.0, 'When the true effect size = 0')
Every p-value is equally likely. That’s why chosen alpha level corresponds to how often you will fool yourself in the long run, when there is no effect.
If the chosen alpha level is .10, then under the null hypothesis 10 percent of the p-values fall below 0.10.
= [val for val in p_val_list if val <= 0.05] # .05 is the chosen for alpha
power print('Power: ', round(len(power) / len(p_val_list), 2))
Power: 0.05
This is our long-term type I error rate, since theoretically the power is undefined in this case (i.e., the null is the truth). We don’t expect to fool ourselves more than 5% in the long run, when there is no effect.
Let’s see what happens when there is an effect:
= []
p_val_list
for i in range(0, 1000000):
= np.random.normal(loc=100, scale=15, size=51)
ctrl = np.random.normal(loc=104.5, scale=15, size=51)
trt = st.ttest_ind(trt, ctrl)[1]
p_val
p_val_list.append(p_val)
= plt.figure(figsize=(9, 5))
fig = fig.add_subplot(1, 1, 1)
ax
='plain', axis='y')
plt.ticklabel_format(style
=100, edgecolor='black', linewidth=.9)
ax.hist(p_val_list, bins=10000, color='r', linestyle='--')
ax.axhline(y
5))
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(='both', width=2)
ax.tick_params(which='major', length=7)
ax.tick_params(which='minor', length=4)
ax.tick_params(which
0, 1000000)
ax.set_ylim('For Cohen\'s d of 0.3') plt.title(
Text(0.5, 1.0, "For Cohen's d of 0.3")
= [val for val in p_val_list if val <= 0.05] # .05 is the chosen for alpha
power print('Power: ', round(len(power) / len(p_val_list), 2))
Power: 0.32
= plt.figure(figsize=(9, 5))
fig = fig.add_subplot(1, 1, 1)
ax
='plain', axis='y')
plt.ticklabel_format(style
for val in p_val_list if val <= .05], bins=5, edgecolor='black', linewidth=.9)
ax.hist([val =10000, color='r', linestyle='--')
ax.axhline(y
# ax.xaxis.set_minor_locator(AutoMinorLocator(5))
5))
ax.yaxis.set_minor_locator(AutoMinorLocator(='both', width=2)
ax.tick_params(which='major', length=7)
ax.tick_params(which='minor', length=4)
ax.tick_params(which
0, 0.05)
ax.set_xlim(0, 1000000)
ax.set_ylim('For Cohen\'s d of 0.3') plt.title(
Text(0.5, 1.0, "For Cohen's d of 0.3")
= []
p_val_list
for i in range(0, 1000000):
= np.random.normal(loc=100, scale=15, size=51)
ctrl = np.random.normal(loc=107.5, scale=15, size=51)
trt = st.ttest_ind(trt, ctrl)[1]
p_val
p_val_list.append(p_val)
= plt.figure(figsize=(9, 5))
fig = fig.add_subplot(1, 1, 1)
ax
='plain', axis='y')
plt.ticklabel_format(style
=100, edgecolor='black', linewidth=.9)
ax.hist(p_val_list, bins=10000, color='r', linestyle='--', alpha=.5)
ax.axhline(y
5))
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(='both', width=2)
ax.tick_params(which='major', length=7)
ax.tick_params(which='minor', length=4)
ax.tick_params(which
0, 1000000)
ax.set_ylim('For Cohen\'s d of 0.5') plt.title(
Text(0.5, 1.0, "For Cohen's d of 0.5")
= [val for val in p_val_list if val <= 0.05] # .05 is the chosen for alpha
power print('Power: ', round(len(power) / len(p_val_list), 2))
Power: 0.71
= plt.figure(figsize=(9, 5))
fig = fig.add_subplot(1, 1, 1)
ax
='plain', axis='y')
plt.ticklabel_format(style
for val in p_val_list if val <= .05], bins=5, edgecolor='black', linewidth=.9)
ax.hist([val =10000, color='r', linestyle='--')
ax.axhline(y
# ax.xaxis.set_minor_locator(AutoMinorLocator(5))
5))
ax.yaxis.set_minor_locator(AutoMinorLocator(='both', width=2)
ax.tick_params(which='major', length=7)
ax.tick_params(which='minor', length=4)
ax.tick_params(which
0, 0.05)
ax.set_xlim(0, 1000000)
ax.set_ylim('For Cohen\'s d of 0.5') plt.title(
Text(0.5, 1.0, "For Cohen's d of 0.5")
= []
p_val_list
for i in range(0, 1000000):
= np.random.normal(loc=100, scale=15, size=51)
ctrl = np.random.normal(loc=109.5, scale=15, size=51)
trt = st.ttest_ind(trt, ctrl)[1]
p_val
p_val_list.append(p_val)
= plt.figure(figsize=(9, 5))
fig = fig.add_subplot(1, 1, 1)
ax
='plain', axis='y')
plt.ticklabel_format(style
=100, edgecolor='black', linewidth=.9)
ax.hist(p_val_list, bins=10000, color='r', linestyle='--', alpha=.5)
ax.axhline(y
5))
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(='both', width=2)
ax.tick_params(which='major', length=7)
ax.tick_params(which='minor', length=4)
ax.tick_params(which
0, 1000000)
ax.set_ylim('For Cohen\'s d of 0.7') plt.title(
Text(0.5, 1.0, "For Cohen's d of 0.7")
= [val for val in p_val_list if val <= 0.05] # .05 is the chosen for alpha
power print('Power: ', round(len(power) / len(p_val_list), 2))
Power: 0.89
= plt.figure(figsize=(9, 5))
fig = fig.add_subplot(1, 1, 1)
ax
='plain', axis='y')
plt.ticklabel_format(style
for val in p_val_list if val <= .05], bins=5, edgecolor='black', linewidth=.9)
ax.hist([val =10000, color='r', linestyle='--')
ax.axhline(y
# ax.xaxis.set_minor_locator(AutoMinorLocator(5))
5))
ax.yaxis.set_minor_locator(AutoMinorLocator(='both', width=2)
ax.tick_params(which='major', length=7)
ax.tick_params(which='minor', length=4)
ax.tick_params(which
0, 0.05)
ax.set_xlim(0, 1000000)
ax.set_ylim('For Cohen\'s d of 0.7') plt.title(
Text(0.5, 1.0, "For Cohen's d of 0.7")
print(len([val for val in p_val_list if val >= .00 and val <= .01]) / len(p_val_list))
0.715716
= []
p_val_list
for i in range(0, 1000000):
= np.random.normal(loc=100, scale=15, size=51)
ctrl = np.random.normal(loc=112, scale=15, size=51)
trt = st.ttest_ind(trt, ctrl)[1]
p_val
p_val_list.append(p_val)
= plt.figure(figsize=(9, 5))
fig = fig.add_subplot(1, 1, 1)
ax
='plain', axis='y')
plt.ticklabel_format(style
=100, edgecolor='black', linewidth=.9)
ax.hist(p_val_list, bins=10000, color='r', linestyle='--')
ax.axhline(y
5))
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(='both', width=2)
ax.tick_params(which='major', length=7)
ax.tick_params(which='minor', length=4)
ax.tick_params(which
0, 1, 0.05))
ax.set_xticks(np.arange(0, 1000000)
ax.set_ylim('For Cohen\'s d of 0.8') plt.title(
Text(0.5, 1.0, "For Cohen's d of 0.8")
= plt.figure(figsize=(9, 5))
fig = fig.add_subplot(1, 1, 1)
ax
='plain', axis='y')
plt.ticklabel_format(style
for val in p_val_list if val <= .05], bins=5, edgecolor='black', linewidth=.9)
ax.hist([val =10000, color='r', linestyle='--')
ax.axhline(y
# ax.xaxis.set_minor_locator(AutoMinorLocator(5))
5))
ax.yaxis.set_minor_locator(AutoMinorLocator(='both', width=2)
ax.tick_params(which='major', length=7)
ax.tick_params(which='minor', length=4)
ax.tick_params(which
0, 0.05)
ax.set_xlim(0, 1000000)
ax.set_ylim('For Cohen\'s d of 0.8') plt.title(
Text(0.5, 1.0, "For Cohen's d of 0.8")
print(len([val for val in p_val_list if val >= .04 and val <= .05]) / len(p_val_list), '\n',
len([val for val in p_val_list if val >= .03 and val <= .04]) / len(p_val_list), '\n',
len([val for val in p_val_list if val >= .02 and val <= .03]) / len(p_val_list), '\n',
len([val for val in p_val_list if val >= .01 and val <= .02]) / len(p_val_list), '\n',
len([val for val in p_val_list if val >= .00 and val <= .01]) / len(p_val_list))
0.005389
0.008187
0.014401
0.032377
0.91928
= [val for val in p_val_list if val <= 0.05] # .05 is the chosen for alpha
power print('Power: ', round(len(power) / len(p_val_list), 2))
Power: 0.98
As the statistical power increases, distribution of p-values pile up at the very left: Some p-values below 0.05 become more likely (ones more close to 0.00). And when you have very high power, certain p-values below 0.05 (relatively high ones) become more likely under the null:
Hence, wouldn’t be wise to reject the null despite p-value < .05
= []
p_val_list
for i in range(0, 1000000):
= np.random.normal(loc=100, scale=15, size=65) # different sample size
ctrl = np.random.normal(loc=107.5, scale=15, size=65) # different sample size
trt = st.ttest_ind(trt, ctrl)[1]
p_val
p_val_list.append(p_val)
= [val for val in p_val_list if val <= 0.05] # .05 is the chosen for alpha
power print('Power: ', round(len(power) / len(p_val_list), 2))
Power: 0.81
= plt.figure(figsize=(9, 5))
fig = fig.add_subplot(1, 1, 1)
ax
='plain', axis='y')
plt.ticklabel_format(style
=100, edgecolor='black', linewidth=.9)
ax.hist(p_val_list, bins5))
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(='both', width=2)
ax.tick_params(which='major', length=7)
ax.tick_params(which='minor', length=4)
ax.tick_params(which
0, 1, 0.05))
ax.set_xticks(np.arange(0, 1000000)
ax.set_ylim('For Cohen\'s d of 0.5') plt.title(
Text(0.5, 1.0, "For Cohen's d of 0.5")
= []
p_val_list
for i in range(0, 1000000):
= np.random.normal(loc=100, scale=15, size=100) # different sample size
ctrl = np.random.normal(loc=107.5, scale=15, size=100) # different sample size
trt = st.ttest_ind(trt, ctrl)[1]
p_val
p_val_list.append(p_val)
= [val for val in p_val_list if val <= 0.05] # .05 is the chosen for alpha
power print('Power: ', round(len(power) / len(p_val_list), 2))
Power: 0.94
= plt.figure(figsize=(9, 5))
fig = fig.add_subplot(1, 1, 1)
ax
='plain', axis='y')
plt.ticklabel_format(style
=100, edgecolor='black', linewidth=.9)
ax.hist(p_val_list, bins5))
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(='both', width=2)
ax.tick_params(which='major', length=7)
ax.tick_params(which='minor', length=4)
ax.tick_params(which
0, 1, 0.05))
ax.set_xticks(np.arange(0, 1000000)
ax.set_ylim('For Cohen\'s d of 0.5') plt.title(
Text(0.5, 1.0, "For Cohen's d of 0.5")
As one can see, statistical power also depends on the sample size and it should make sense: p-value is a function of test statistic (t, in this case) and following from there, function of sample size. If this doesn’t feel comfortable to you, I suggest you to take a look at this post, and check what happens to standard error as the sample size increases.
In the examples below, we always knew the underlying distributions of the populations we sample from. In reality, we usually end up with a result and we wonder the true effect: What is the chance that there is an effect, given that I observed one. This conditional probability is called positive predictive value, something that I heard from a political scientist during pre-COVID days, and if you’re interested in these stuff I suggest you to take a look at it. You can start from Ioannidis’ paper or Lakens’ book for more structured follow.