options(warn=-1)
HyperGeometric distribution models sampling without replacement, we take a ball from a cup and dont throw it back,so each time we try to find the “success” ( here success is getting black ball for example") with remaining count of balls. So the probability depends on previous drawnings. lets create a vector with fails and successes(here ones are success or desired output, getting black ball for example)
observations <- c(0,0,1,0,1,1,0,1,0,0,0,0)
define hypergeometric distribution function
N : Total number of observations
R : Total Number of desired output in vector
n : trial count
y : desired output or success
hyper_func <- function(N,R,n,y){
temp<-((factorial(R) / (factorial(y) * factorial(R-y))) * (factorial(N-R) / (factorial(n-y) * factorial(N-R-n+y)))) / (factorial(N) / (factorial(n) * factorial(N-n)))
return(temp)
}
suppose that we have 12 balls in a cup and 4 of them is black, what is the probability of getting one black in first draw ?
hyper_func(length(observations),4,1,1)
## [1] 0.3333333
lets look at the event from another aspect, what is the probability of gettin one black ball in one draw or two draw ?
for (i in 1:2){
sumx <- sum(observations)
len <- length(observations)
print(hyper_func(len,sumx,i,1))
}
## [1] 0.3333333
## [1] 0.4848485
what is the probability of gettin one black ball when we make one to five draw ?
for (i in 1:5){
sumx <- sum(observations)
len <- length(observations)
print(hyper_func(len,sumx,i,1))
}
## [1] 0.3333333
## [1] 0.4848485
## [1] 0.5090909
## [1] 0.4525253
## [1] 0.3535354
what if to 12 draw ?
for (i in 1:12){
sumx <- sum(observations)
len <- length(observations)
print(hyper_func(len,sumx,i,1))
}
## [1] 0.3333333
## [1] 0.4848485
## [1] 0.5090909
## [1] 0.4525253
## [1] 0.3535354
## [1] 0.2424242
## [1] 0.1414141
## [1] 0.06464646
## [1] 0.01818182
## [1] NaN
## [1] NaN
## [1] NaN
so lets create another vector which includes successes
Vec <- c(rep(0, 100), rep(1, 10))
Vec
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [36] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [71] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1
## [106] 1 1 1 1 1
lets calculate it for this vector, what is the probability of getting 3 black balls if we make up to 50 draws ?
result <- NULL
for (i in 1:50){
sumx <- sum(Vec)
len <- length(Vec)
result[i] <- hyper_func(len,sumx,i,3)
print(result[i])
}
## [1] NaN
## [1] NaN
## [1] 0.0005560189
## [1] 0.002078575
## [1] 0.004853277
## [1] 0.009059451
## [1] 0.01478694
## [1] 0.0220512
## [1] 0.03080683
## [1] 0.04095958
## [1] 0.05237706
## [1] 0.06489817
## [1] 0.07834136
## [1] 0.09251183
## [1] 0.1072077
## [1] 0.1222255
## [1] 0.1373643
## [1] 0.1524301
## [1] 0.1672382
## [1] 0.1816161
## [1] 0.1954055
## [1] 0.2084634
## [1] 0.2206632
## [1] 0.2318957
## [1] 0.2420687
## [1] 0.2511076
## [1] 0.2589547
## [1] 0.2655689
## [1] 0.2709252
## [1] 0.2750132
## [1] 0.277837
## [1] 0.2794135
## [1] 0.2797718
## [1] 0.2789513
## [1] 0.2770014
## [1] 0.2739796
## [1] 0.2699505
## [1] 0.2649846
## [1] 0.2591574
## [1] 0.2525478
## [1] 0.2452372
## [1] 0.2373087
## [1] 0.2288458
## [1] 0.2199319
## [1] 0.2106491
## [1] 0.2010775
## [1] 0.191295
## [1] 0.181376
## [1] 0.1713914
## [1] 0.161408
for plotting : 50 draws to get 3 black balls
res1<-NULL
for (i in 1:50){
sumx <- sum(Vec)
len <- length(Vec)
res1[i] <- hyper_func(len,sumx,i,3)
}
plot 1
plot(res1,type="b",main="Binom Distribution",ylab="Binomial Distributions of Values", xlab = "Number of Trials",col="blue")
50 draws to get 4 black balls
res2 <- NULL
for (i in 1:50){
sumx <- sum(Vec)
len <- length(Vec)
res2[i] <- hyper_func(len,sumx,i,4)
}
plot 2
plot(res2,type="b",main="Binom Distribution",ylab="Binomial Distributions of Values", xlab = "Number of Trials",col="green")
60 draws to get 4 black balls
res3<-NULL
for (i in 1:60){
sumx <- sum(Vec)
len <- length(Vec)
res3[i] <- hyper_func(len,sumx,i,4)
}
plot 3
plot(res3,type="b",main="Binom Distribution",ylab="Binomial Distributions of Values", xlab = "Number of Trials",col="red")
# 80 draws to get 4 black balls
res4<-NULL
for (i in 1:80){
sumx <- sum(Vec)
len <- length(Vec)
res4[i] <- hyper_func(len,sumx,i,4)
}
plot 4
plot(res4,type="b",main="Binom Distribution",ylab="Binomial Distributions of Values", xlab = "Number of Trials",col="red")
90 draws to get 4 black balls
res5<-NULL
for (i in 1:90){
sumx <- sum(Vec)
len <- length(Vec)
res5[i] <- hyper_func(len,sumx,i,4)
}
plot 5
plot(res5,type="b",main="Binom Distribution",ylab="Binomial Distributions of Values", xlab = "Number of Trials",col="orange")