The code calls in the bacteria file to a z variable that can be called into other code. While also calling in our ggplot and MASS functions.
library(ggplot2) # for graphics
library(MASS) # for maximum likelihood estimation
z <- read.csv("2008_bacprods_Kling.csv",header=TRUE,sep=",")
str(z)
## 'data.frame': 252 obs. of 6 variables:
## $ SortChem : chr "2008-0020" "2008-0021" "2008-0022" "2008-0023" ...
## $ Site : chr "Toolik Inlet Bay" "Toolik" "Toolik Southwest Basin" "Toolik Moraine" ...
## $ Date : chr "26-May-08" "26-May-08" "26-May-08" "26-May-08" ...
## $ Time_hr_.DST. : chr "11:00" "11:45" "12:19" "12:45" ...
## $ Depth_m : num 5 5 5 5 5 5 5 5 5 5 ...
## $ Bac_prod_ug_C_L_day: num 2.91 2.26 2.29 2.39 2.2 ...
summary(z)
## SortChem Site Date Time_hr_.DST.
## Length:252 Length:252 Length:252 Length:252
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Depth_m Bac_prod_ug_C_L_day
## Min. : 0.010 Min. : 0.220
## 1st Qu.: 0.010 1st Qu.: 2.288
## Median : 1.000 Median : 4.940
## Mean : 2.944 Mean : 9.865
## 3rd Qu.: 5.000 3rd Qu.: 7.230
## Max. :16.000 Max. :165.110
The code below plots the data into a histogram.
p1 <- ggplot(data=z, aes(x=Bac_prod_ug_C_L_day, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
The code is supposed to show how the density of the group is reflected in the histogram
p1 <- p1 + geom_density(linetype="dotted",size=0.75)
The code finds the means and the standard deviation of the data.
normPars <- fitdistr(z$Bac_prod_ug_C_L_day,"normal")
print(normPars)
## mean sd
## 9.8645238 19.2252099
## ( 1.2110744) ( 0.8563589)
str(normPars)
## List of 5
## $ estimate: Named num [1:2] 9.86 19.23
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 1.211 0.856
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 1.467 0 0 0.733
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 252
## $ loglik : num -1103
## - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"]
## mean
## 9.864524
## mean sd
## 9.8645238 19.2252099
## ( 1.2110744) ( 0.8563589)
The normal distribution of the data
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(z$Bac_prod_ug_C_L_day),len=length(z$Bac_prod_ug_C_L_day))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$Bac_prod_ug_C_L_day), args = list(mean = meanML, sd = sdML))
Plots the data as if it was in an exponential graph
expoPars <- fitdistr(z$Bac_prod_ug_C_L_day,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$Bac_prod_ug_C_L_day), args = list(rate=rateML))
Plots the data as if it was a uniform graph
stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$Bac_prod_ug_C_L_day), args = list(min=min(z$Bac_prod_ug_C_L_day), max=max(z$Bac_prod_ug_C_L_day)))
Plots the data similarly like the exponential function.
gammaPars <- fitdistr(z$Bac_prod_ug_C_L_day,"gamma")
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$Bac_prod_ug_C_L_day), args = list(shape=shapeML, rate=rateML))
Raw data is scaled to either zero or one.
pSpecial <- ggplot(data=z, aes(x=Bac_prod_ug_C_L_day/(max(Bac_prod_ug_C_L_day + 0.1)), y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
xlim(c(0,1)) +
geom_density(size=0.75,linetype="dotted")
betaPars <- fitdistr(x=z$Bac_prod_ug_C_L_day/max(z$Bac_prod_ug_C_L_day + 0.1),start=list(shape1=1,shape2=2),"beta")
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$Bac_prod_ug_C_L_day), args = list(shape1=shape1ML,shape2=shape2ML))
The best fitting distribution would have to be empirical density curve due to the way it follows the trend of the data.
This code makes a randomly generated data set of 200 specimens.
The code also finds the quartile ranges as well as the median and mean.
## 'data.frame': 1720 obs. of 2 variables:
## $ ID : int 1 3 4 5 7 9 12 15 16 17 ...
## $ myVar: num 0.21 0.251 1.334 0.986 0.694 ...
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000533 0.355140 0.734071 0.867646 1.259622 3.242697
This code plots the random data set.
p1 <- ggplot(data=nd, aes(x=myVar, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p1)
This code creates the normal probability density which best fits the data.
normPars <- fitdistr(nd$myVar,"normal")
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(nd$myVar),len=length(nd$myVar))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(nd$myVar), args = list(mean = meanML, sd = sdML))
p1 + stat