The code calls in the bacteria file to a z variable that can be called into other code. While also calling in our ggplot and MASS functions.
library(ggplot2) # for graphics
library(MASS) # for maximum likelihood estimation
z <- read.csv("2008_bacprods_Kling.csv",header=TRUE,sep=",")
str(z)
## 'data.frame':    252 obs. of  6 variables:
##  $ SortChem           : chr  "2008-0020" "2008-0021" "2008-0022" "2008-0023" ...
##  $ Site               : chr  "Toolik Inlet Bay" "Toolik" "Toolik Southwest Basin" "Toolik Moraine" ...
##  $ Date               : chr  "26-May-08" "26-May-08" "26-May-08" "26-May-08" ...
##  $ Time_hr_.DST.      : chr  "11:00" "11:45" "12:19" "12:45" ...
##  $ Depth_m            : num  5 5 5 5 5 5 5 5 5 5 ...
##  $ Bac_prod_ug_C_L_day: num  2.91 2.26 2.29 2.39 2.2 ...
summary(z)
##    SortChem             Site               Date           Time_hr_.DST.     
##  Length:252         Length:252         Length:252         Length:252        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Depth_m       Bac_prod_ug_C_L_day
##  Min.   : 0.010   Min.   :  0.220    
##  1st Qu.: 0.010   1st Qu.:  2.288    
##  Median : 1.000   Median :  4.940    
##  Mean   : 2.944   Mean   :  9.865    
##  3rd Qu.: 5.000   3rd Qu.:  7.230    
##  Max.   :16.000   Max.   :165.110
The code below plots the data into a histogram.
p1 <- ggplot(data=z, aes(x=Bac_prod_ug_C_L_day, y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2)

The code is supposed to show how the density of the group is reflected in the histogram
p1 <-  p1 +  geom_density(linetype="dotted",size=0.75)

The code finds the means and the standard deviation of the data.
normPars <- fitdistr(z$Bac_prod_ug_C_L_day,"normal")
print(normPars)
##       mean          sd    
##    9.8645238   19.2252099 
##  ( 1.2110744) ( 0.8563589)
str(normPars)
## List of 5
##  $ estimate: Named num [1:2] 9.86 19.23
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ sd      : Named num [1:2] 1.211 0.856
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ vcov    : num [1:2, 1:2] 1.467 0 0 0.733
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "mean" "sd"
##   .. ..$ : chr [1:2] "mean" "sd"
##  $ n       : int 252
##  $ loglik  : num -1103
##  - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"]
##     mean 
## 9.864524
##       mean          sd    
##    9.8645238   19.2252099 
##  ( 1.2110744) ( 0.8563589)
The normal distribution of the data
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(z$Bac_prod_ug_C_L_day),len=length(z$Bac_prod_ug_C_L_day))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$Bac_prod_ug_C_L_day), args = list(mean = meanML, sd = sdML))

Plots the data as if it was in an exponential graph
expoPars <- fitdistr(z$Bac_prod_ug_C_L_day,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$Bac_prod_ug_C_L_day), args = list(rate=rateML))

Plots the data as if it was a uniform graph
stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$Bac_prod_ug_C_L_day), args = list(min=min(z$Bac_prod_ug_C_L_day), max=max(z$Bac_prod_ug_C_L_day)))

Plots the data similarly like the exponential function.
gammaPars <- fitdistr(z$Bac_prod_ug_C_L_day,"gamma")
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$Bac_prod_ug_C_L_day), args = list(shape=shapeML, rate=rateML))

Raw data is scaled to either zero or one.
pSpecial <- ggplot(data=z, aes(x=Bac_prod_ug_C_L_day/(max(Bac_prod_ug_C_L_day + 0.1)), y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
  xlim(c(0,1)) +
  geom_density(size=0.75,linetype="dotted")
betaPars <- fitdistr(x=z$Bac_prod_ug_C_L_day/max(z$Bac_prod_ug_C_L_day + 0.1),start=list(shape1=1,shape2=2),"beta")
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$Bac_prod_ug_C_L_day), args = list(shape1=shape1ML,shape2=shape2ML))

The best fitting distribution would have to be empirical density curve due to the way it follows the trend of the data.
This code makes a randomly generated data set of 200 specimens.
The code also finds the quartile ranges as well as the median and mean.
## 'data.frame':    1720 obs. of  2 variables:
##  $ ID   : int  1 3 4 5 7 9 12 15 16 17 ...
##  $ myVar: num  0.21 0.251 1.334 0.986 0.694 ...
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.000533 0.355140 0.734071 0.867646 1.259622 3.242697
This code plots the random data set.
p1 <- ggplot(data=nd, aes(x=myVar, y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p1)

This code creates the normal probability density which best fits the data.
normPars <- fitdistr(nd$myVar,"normal")
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(nd$myVar),len=length(nd$myVar))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(nd$myVar), args = list(mean = meanML, sd = sdML))
p1 + stat
