In [12]:
#  Step 1: simulate data
#  ?sample
#  100 Bernoulli trials
# sample(c(0,1), 100, replace = TRUE)
data <- data.frame(
  group = sample(c("A", "B"), 100, replace = TRUE),
  outcome = sample(c("Success", "Failure"), 100, replace = TRUE)
)
head(data)
tail(data)

Unnamed: 0_level_0,group,outcome
Unnamed: 0_level_1,<chr>,<chr>
1,A,Success
2,A,Failure
3,A,Failure
4,A,Failure
5,A,Failure
6,B,Failure


Unnamed: 0_level_0,group,outcome
Unnamed: 0_level_1,<chr>,<chr>
95,A,Failure
96,A,Failure
97,B,Success
98,A,Failure
99,B,Failure
100,A,Success


In [5]:
#  Step 2 Create a contingency table using xtabs()
#? xtabs

contingency_table <- xtabs(~ group + outcome, data = data)
contingency_table

     outcome
group Failure Success
    A      32      19
    B      29      20

In [13]:
# We would use `xtabs()` to create a contingency table and store it in the variable contingency_table.

# We then use `contingency_table` in the prop.test() function.
# Step 3: Now use this contingency table in prop.test()
#? prop.test
# prop.test can be used for testing the null that the proportions (probabilities of success) 
# in several groups are the same,
# or that they equal certain given values.
test_result <- prop.test(contingency_table)
test_result




	2-sample test for equality of proportions with continuity correction

data:  contingency_table
X-squared = 0.025584, df = 1, p-value = 0.8729
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.2467903  0.1755618
sample estimates:
   prop 1    prop 2 
0.3725490 0.4081633 


In [7]:
#  Step 2' Create a contingency table using xtabs()
# ? xtabs
data$outcome<-factor(data$outcome, levels = c("Success", "Failure"))

contingency_table <- xtabs(~ group + outcome, data = data)
contingency_table

     outcome
group Success Failure
    A      19      32
    B      20      29

In [16]:
successes <- table(data)
successes

     outcome
group Failure Success
    A      30      27
    B      22      21

In [17]:
n1 <- sum(data$group == "A")  # Total observations in group A
n2 <- sum(data$group == "B")  # Total observations in group B

n1
n2

In [None]:
# Step 3: You would first need to calculate p1 and p2 from your data
successes <- table(data)
n1 <- sum(data$group == "A")  # Total observations in group A
n2 <- sum(data$group == "B")  # Total observations in group B
p1 <- successes["A", "Success"] / n1  # Proportion of successes in group A
p2 <- successes["B", "Success"] / n2  # Proportion of successes in group B


# power_result1 is used when planning a study to determine how many participants 
#          you need to include to have a good chance of detecting an effect.
power_result1 <- power.prop.test(p1=p1, p2=p2, sig.level=0.05, power=0.8)
power_result1


# power_result2 is used after data collection to evaluate
#   whether the study had sufficient power to detect an effect given the sample sizes and the observed rates of success.
power_result2 <- power.prop.test(n=c(n1, n2), p1=p1, p2=p2, sig.level=0.05, power=NULL)
power_result2


     Two-sample comparison of proportions power calculation 

              n = 18163.75
             p1 = 0.4736842
             p2 = 0.4883721
      sig.level = 0.05
          power = 0.8
    alternative = two.sided

NOTE: n is number in *each* group



     Two-sample comparison of proportions power calculation 

              n = 57, 43
             p1 = 0.4736842
             p2 = 0.4883721
      sig.level = 0.05
          power = 0.03567667, 0.03408715
    alternative = two.sided

NOTE: n is number in *each* group


In [19]:
# More examples
####################################################
## This is already a contingency table in array form.
DF <- as.data.frame(UCBAdmissions)
## Now 'DF' is a data frame with a grid of the factors and the counts
## in variable 'Freq'.
DF

Admit,Gender,Dept,Freq
<fct>,<fct>,<fct>,<dbl>
Admitted,Male,A,512
Rejected,Male,A,313
Admitted,Female,A,89
Rejected,Female,A,19
Admitted,Male,B,353
Rejected,Male,B,207
Admitted,Female,B,17
Rejected,Female,B,8
Admitted,Male,C,120
Rejected,Male,C,205


In [20]:
## Nice for taking margins ...
xtabs(Freq ~ Gender + Admit, DF)
## And for testing independence ...
summary(xtabs(Freq ~ ., DF))

        Admit
Gender   Admitted Rejected
  Male       1198     1493
  Female      557     1278

Call: xtabs(formula = Freq ~ ., data = DF)
Number of cases in table: 4526 
Number of factors: 3 
Test for independence of all factors:
	Chisq = 2000.3, df = 16, p-value = 0

In [None]:
# Test of Equal or Given Proportions

heads <- rbinom(1, size = 100, prob = .5)
prop.test(heads, 100)          # continuity correction TRUE by default
prop.test(heads, 100, correct = FALSE)



	1-sample proportions test with continuity correction

data:  heads out of 100, null probability 0.5
X-squared = 0.81, df = 1, p-value = 0.3681
alternative hypothesis: true p is not equal to 0.5
95 percent confidence interval:
 0.3514281 0.5524574
sample estimates:
   p 
0.45 



	1-sample proportions test without continuity correction

data:  heads out of 100, null probability 0.5
X-squared = 1, df = 1, p-value = 0.3173
alternative hypothesis: true p is not equal to 0.5
95 percent confidence interval:
 0.3561454 0.5475540
sample estimates:
   p 
0.45 


In [None]:

## Data from Fleiss (1981), p. 139.
## H0: The null hypothesis is that the four populations from which
##     the patients were drawn have the same true proportion of smokers.
## A:  The alternative is that this proportion is different in at
##     least one of the populations.

smokers  <- c( 83, 90, 129, 70)
patients <- c( 86, 93, 136, 82)
prop.test(smokers, patients)


	4-sample test for equality of proportions without continuity correction

data:  smokers out of patients
X-squared = 12.6, df = 3, p-value = 0.005585
alternative hypothesis: two.sided
sample estimates:
   prop 1    prop 2    prop 3    prop 4 
0.9651163 0.9677419 0.9485294 0.8536585 
