The hypothesis tested here is that there is a difference in bird count across tree different fields
# loading packsges
library(ggplot2)
library(MASS)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
################### true data #############################
z <- read.table("fdata.csv",header=TRUE,sep=",")
str(z)
## 'data.frame': 817 obs. of 19 variables:
## $ farm.code : chr "GBN 8" "GBN 8" "GBN 8" "GBN 8" ...
## $ before.after : chr "after" "after" "after" "after" ...
## $ site : chr "rizek" "rizek" "rizek" "rizek" ...
## $ session : int 4 4 4 4 4 4 4 4 4 4 ...
## $ crop.type : chr "Legume" "Legume" "Legume" "Legume" ...
## $ species : chr "Whinchat" "Plain backed pipit" "Purple glossy starling" "Laughing dove" ...
## $ guild : chr "insectivore" "insectivore" "insectivore" "granivore" ...
## $ number : int 2 2 1 1 2 1 6 2 1 1 ...
## $ area : int 522 522 522 522 522 522 522 522 522 522 ...
## $ N_trees : int 1 1 1 1 1 1 1 1 1 1 ...
## $ tree.density : num 0.00192 0.00192 0.00192 0.00192 0.00192 ...
## $ length.of.transect: int 29 29 29 29 29 29 29 29 29 29 ...
## $ crop.heigth : int 31 31 31 31 31 31 31 31 31 31 ...
## $ D_trees : int 1 1 1 1 1 1 1 1 1 1 ...
## $ D_vegetation : int 3 3 3 3 3 3 3 3 3 3 ...
## $ vegetation.type : chr "woodlot" "woodlot" "woodlot" "woodlot" ...
## $ matrix_habitat : chr "crop field" "crop field" "crop field" "crop field" ...
## $ Water : int 4 4 4 4 4 4 4 4 4 4 ...
## $ Trees.10m : int 1 1 1 1 1 1 1 1 1 1 ...
summary(z)
## farm.code before.after site session
## Length:817 Length:817 Length:817 Min. :4
## Class :character Class :character Class :character 1st Qu.:4
## Mode :character Mode :character Mode :character Median :4
## Mean :4
## 3rd Qu.:4
## Max. :4
## crop.type species guild number
## Length:817 Length:817 Length:817 Min. : 0.000
## Class :character Class :character Class :character 1st Qu.: 1.000
## Mode :character Mode :character Mode :character Median : 1.000
## Mean : 1.819
## 3rd Qu.: 2.000
## Max. :41.000
## area N_trees tree.density length.of.transect
## Min. : 522 Min. : 0.000 Min. :0.0000000 Min. : 29.00
## 1st Qu.: 1475 1st Qu.: 0.000 1st Qu.:0.0000000 1st Qu.: 47.00
## Median : 2320 Median : 1.000 Median :0.0002000 Median : 57.00
## Mean : 3261 Mean : 1.487 Mean :0.0006220 Mean : 74.78
## 3rd Qu.: 3380 3rd Qu.: 1.000 3rd Qu.:0.0006667 3rd Qu.: 67.00
## Max. :15010 Max. :14.000 Max. :0.0034965 Max. :234.00
## crop.heigth D_trees D_vegetation vegetation.type
## Min. : 17.0 Min. : 1.00 Min. :1.000 Length:817
## 1st Qu.: 33.0 1st Qu.: 2.00 1st Qu.:1.000 Class :character
## Median :180.0 Median : 6.00 Median :2.000 Mode :character
## Mean :139.6 Mean :14.87 Mean :2.471
## 3rd Qu.:205.0 3rd Qu.:23.00 3rd Qu.:4.000
## Max. :242.0 Max. :82.00 Max. :4.000
## matrix_habitat Water Trees.10m
## Length:817 Min. :0.000 Min. : 0.000
## Class :character 1st Qu.:4.000 1st Qu.: 0.000
## Mode :character Median :4.000 Median : 1.000
## Mean :3.447 Mean : 2.285
## 3rd Qu.:4.000 3rd Qu.: 3.000
## Max. :4.000 Max. :15.000
mean(z$number) # generate the mean number of birds recorded
## [1] 1.818849
sum(z$number) # get the sample size
## [1] 1486
sd(z$number) # get variance of the number of birds
## [1] 2.343951
######### subsetting data to get mean values of elements in the charater string
# subsetting bird count in legume fields
aa <- subset(z, crop.type == "Legume", select = "number")
summary(aa)
## number
## Min. : 0.000
## 1st Qu.: 1.000
## Median : 1.000
## Mean : 1.568
## 3rd Qu.: 2.000
## Max. :21.000
sum(aa$number) # get the sample size
## [1] 425
mean(aa$number)
## [1] 1.568266
sd(aa$number)
## [1] 1.907575
# subsetting bird count in maize fields
bb <- subset(z, crop.type == "Maize", select = "number")
mean(bb$number)
## [1] 2.026042
sd(bb$number)
## [1] 3.357316
sum(bb$number)
## [1] 389
# substting bird count in mix crop fields
cc <- subset(z, crop.type == "Mixed", select = "number")
sum(z$number) # get the sample size
## [1] 1486
mean(cc$number)
## [1] 1.898305
sd(cc$number)
## [1] 1.933995
sum(cc$number)
## [1] 672
Simulating data using values from original data
nGroup <- 3 # number of treatment groups
nName <- c("Legume","Maize", "Mix") # names of groups
nSize <- c(425,389,672) # number of observations in each group
nMean <- c(1.8,2.0,1.9) # mean of each group
nSD <- c(1.9,3.4,1.9) # standardd deviation of each group
ID <- 1:(sum(nSize))
countc <- c(rnorm(n=nSize[1],mean=nMean[1],sd=nSD[1]),
rnorm(n=nSize[2],mean=nMean[2],sd=nSD[2]),
rnorm(n=nSize[3],mean=nMean[3],sd=nSD[3]))
cGroup <- rep(nName,nSize)
Croptype <- data.frame(ID,cGroup,countc)
# performing ANOVA
ANOmodel <- aov(countc~cGroup,data=Croptype)
print(ANOmodel)
## Call:
## aov(formula = countc ~ cGroup, data = Croptype)
##
## Terms:
## cGroup Residuals
## Sum of Squares 29.755 8631.502
## Deg. of Freedom 2 1483
##
## Residual standard error: 2.412529
## Estimated effects may be unbalanced
print(summary(ANOmodel))
## Df Sum Sq Mean Sq F value Pr(>F)
## cGroup 2 30 14.88 2.556 0.0779 .
## Residuals 1483 8632 5.82
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plotting graph
ANOPlot <- ggplot(data=Croptype,aes(x=cGroup,y=countc,fill=cGroup)) +
geom_boxplot()
print(ANOPlot)
There was no significant difference between the means of the bird counts in the three field types even though total abundance varied significantly across field types
I am now going to adjust the sample size of all three field types to see if it will be enough to cause a significant difference.
nGroup <- 3 # number of treatment groups
nName <- c("Legume","Maize", "Mix") # names of groups
nSize <- c(620,589,672) # number of observations in each group
nMean <- c(1.8,2.0,1.9) # mean of each group
nSD <- c(1.9,3.4,1.9) # standardd deviation of each group
ID <- 1:(sum(nSize))
countc <- c(rnorm(n=nSize[1],mean=nMean[1],sd=nSD[1]),
rnorm(n=nSize[2],mean=nMean[2],sd=nSD[2]),
rnorm(n=nSize[3],mean=nMean[3],sd=nSD[3]))
cGroup <- rep(nName,nSize)
Croptype <- data.frame(ID,cGroup,countc)
# performing ANOVA
ANOmodel <- aov(countc~cGroup,data=Croptype)
print(ANOmodel)
## Call:
## aov(formula = countc ~ cGroup, data = Croptype)
##
## Terms:
## cGroup Residuals
## Sum of Squares 67.867 11756.045
## Deg. of Freedom 2 1878
##
## Residual standard error: 2.501974
## Estimated effects may be unbalanced
print(summary(ANOmodel))
## Df Sum Sq Mean Sq F value Pr(>F)
## cGroup 2 68 33.93 5.421 0.00449 **
## Residuals 1878 11756 6.26
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plotting graph
ANOPlot <- ggplot(data=Croptype,aes(x=cGroup,y=countc,fill=cGroup)) +
geom_boxplot()
print(ANOPlot)
Adjusting the sample size of the lowest group to within the size of the largest field such that the difference between the three groups was about 50 and 70 birds respectively.
Next, I adjusted the mean of legume field and kept the sample size unchanged
nName <- c("Legume","Maize", "Mix") # names of groups
nSize <- c(425,389,672) # number of observations in each group
nMean <- c(1.0,2.0,1.9) # mean of each group
nSD <- c(1.9,3.4,1.9) # standardd deviation of each group
ID <- 1:(sum(nSize))
countc <- c(rnorm(n=nSize[1],mean=nMean[1],sd=nSD[1]),
rnorm(n=nSize[2],mean=nMean[2],sd=nSD[2]),
rnorm(n=nSize[3],mean=nMean[3],sd=nSD[3]))
cGroup <- rep(nName,nSize)
Croptype <- data.frame(ID,cGroup,countc)
# performing ANOVA
ANOmodel <- aov(countc~cGroup,data=Croptype)
print(ANOmodel)
## Call:
## aov(formula = countc ~ cGroup, data = Croptype)
##
## Terms:
## cGroup Residuals
## Sum of Squares 350.250 8774.472
## Deg. of Freedom 2 1483
##
## Residual standard error: 2.432428
## Estimated effects may be unbalanced
print(summary(ANOmodel))
## Df Sum Sq Mean Sq F value Pr(>F)
## cGroup 2 350 175.13 29.6 2.49e-13 ***
## Residuals 1483 8774 5.92
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plotting graph
ANOPlot <- ggplot(data=Croptype,aes(x=cGroup,y=countc,fill=cGroup)) +
geom_boxplot()
print(ANOPlot)
Changing the mean of birds recorded in legume field from 1.8 to 1.0 yielded a significant difference in the results. Thus manipulating either the sample size or mean yields a difference in results.