e.g. flipping a coin
-
normal distirbution
def normal_approximation_to_binomial(n, p): """find mu and sigma corresponding to a binomial(n, p)""" mu = p * n sigma = math.sqrt(p * (1 - p) * n) return mu, sigma
-
whenever a random variable follows a normal distribution
# we can use `normal_cdf` # the normal cdf is the probability # the variable is below a threshold normal_probability_below = normal_cdf # it's above the threshold if it's not below the threshold def normal_probability_above(lo, mu=0, sigma=1): return 1 - normal_cdf(lo, mu, sigma) # it's between if it's less than hi, but not less than lo def normal_probability_between(lo, hi, mu=0, sigma=1): return normal_cdf(hi, mu, sigma) - normal_cdf(lo, mu, sigma) # it's outside if it's not between def normal_probability_outside(lo, hi, mu=0, sigma=1): return 1 - normal_probability_between(lo, hi, mu, sigma)
-
we can do the reverse
def normal_uper_bound(probability, mu=0, sigma=1): """returns the z for which P(Z <= z) = probability""" return inverse_normal_cdf(probability, mu, sigma) def normal_lower_bound(probability, mu=0, sigma=1): """returns the z for which P(Z >= z) = probability""" return inverse_normal_cdf(1 - probability, mu, sigma) def normal_two_sided_bounds(probability, mu=0, sigma=1): """returns the symmetric (about the mean) bounds that contain the specified probability""" tail_probability = (1 - probability) / 2 # upper bound should have tail_probability above it upper_bound = normal_lower_bound(tail_probability, mu, sigma) # lower bound should have tail_probability below it lower_bound = normal_upper_bound(tail_probability, mu, sigma) return lower_bound, upper_bound
-
filp coin n = 1000 times
mu_0, sigma_0 = normal_approximation_to_binomial(1000, 0.5) normal_two_sided_bounds(0.95, mu_0, sigma_0) # (469, 531)
-
calculate the power of the test
# 95% bounds based an assumption p is 0.5 lo, hi = normal_two_sided_bounds(0.95, mu_0, sigma_0) # actual mu and sigma based on p = 0.55 mu_1, sigma_1 = normal_approximation_to_binomial(1000, 0.55) # a type 2 error means we fail to reject the null hypothesis # which will happen when X is still in our original interval type_2_probability = normal_probability_between(lo, hi, mu_1, sigma_1) power = 1 - type_2_probability # 0.887
-