Homework

The hypothesis tested here is that there is a difference in bird count across tree different fields

# loading packsges
library(ggplot2)
library(MASS)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:MASS':
## 
##     select

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

################### true data #############################
z <- read.table("fdata.csv",header=TRUE,sep=",")
str(z)

## 'data.frame':    817 obs. of  19 variables:
##  $ farm.code         : chr  "GBN 8" "GBN 8" "GBN 8" "GBN 8" ...
##  $ before.after      : chr  "after" "after" "after" "after" ...
##  $ site              : chr  "rizek" "rizek" "rizek" "rizek" ...
##  $ session           : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ crop.type         : chr  "Legume" "Legume" "Legume" "Legume" ...
##  $ species           : chr  "Whinchat" "Plain backed pipit" "Purple glossy starling" "Laughing dove" ...
##  $ guild             : chr  "insectivore" "insectivore" "insectivore" "granivore" ...
##  $ number            : int  2 2 1 1 2 1 6 2 1 1 ...
##  $ area              : int  522 522 522 522 522 522 522 522 522 522 ...
##  $ N_trees           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ tree.density      : num  0.00192 0.00192 0.00192 0.00192 0.00192 ...
##  $ length.of.transect: int  29 29 29 29 29 29 29 29 29 29 ...
##  $ crop.heigth       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ D_trees           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ D_vegetation      : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ vegetation.type   : chr  "woodlot" "woodlot" "woodlot" "woodlot" ...
##  $ matrix_habitat    : chr  "crop field" "crop field" "crop field" "crop field" ...
##  $ Water             : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ Trees.10m         : int  1 1 1 1 1 1 1 1 1 1 ...

summary(z)

##   farm.code         before.after           site              session 
##  Length:817         Length:817         Length:817         Min.   :4  
##  Class :character   Class :character   Class :character   1st Qu.:4  
##  Mode  :character   Mode  :character   Mode  :character   Median :4  
##                                                           Mean   :4  
##                                                           3rd Qu.:4  
##                                                           Max.   :4  
##   crop.type           species             guild               number      
##  Length:817         Length:817         Length:817         Min.   : 0.000  
##  Class :character   Class :character   Class :character   1st Qu.: 1.000  
##  Mode  :character   Mode  :character   Mode  :character   Median : 1.000  
##                                                           Mean   : 1.819  
##                                                           3rd Qu.: 2.000  
##                                                           Max.   :41.000  
##       area          N_trees        tree.density       length.of.transect
##  Min.   :  522   Min.   : 0.000   Min.   :0.0000000   Min.   : 29.00    
##  1st Qu.: 1475   1st Qu.: 0.000   1st Qu.:0.0000000   1st Qu.: 47.00    
##  Median : 2320   Median : 1.000   Median :0.0002000   Median : 57.00    
##  Mean   : 3261   Mean   : 1.487   Mean   :0.0006220   Mean   : 74.78    
##  3rd Qu.: 3380   3rd Qu.: 1.000   3rd Qu.:0.0006667   3rd Qu.: 67.00    
##  Max.   :15010   Max.   :14.000   Max.   :0.0034965   Max.   :234.00    
##   crop.heigth       D_trees       D_vegetation   vegetation.type   
##  Min.   : 17.0   Min.   : 1.00   Min.   :1.000   Length:817        
##  1st Qu.: 33.0   1st Qu.: 2.00   1st Qu.:1.000   Class :character  
##  Median :180.0   Median : 6.00   Median :2.000   Mode  :character  
##  Mean   :139.6   Mean   :14.87   Mean   :2.471                     
##  3rd Qu.:205.0   3rd Qu.:23.00   3rd Qu.:4.000                     
##  Max.   :242.0   Max.   :82.00   Max.   :4.000                     
##  matrix_habitat         Water         Trees.10m     
##  Length:817         Min.   :0.000   Min.   : 0.000  
##  Class :character   1st Qu.:4.000   1st Qu.: 0.000  
##  Mode  :character   Median :4.000   Median : 1.000  
##                     Mean   :3.447   Mean   : 2.285  
##                     3rd Qu.:4.000   3rd Qu.: 3.000  
##                     Max.   :4.000   Max.   :15.000

mean(z$number) # generate the mean number of birds recorded

## [1] 1.818849

sum(z$number) # get the sample size

## [1] 1486

sd(z$number) # get variance of the number of birds

## [1] 2.343951

######### subsetting data to get mean values of elements in the charater string
# subsetting bird count in legume fields
aa <- subset(z, crop.type == "Legume", select = "number")
summary(aa)

##      number      
##  Min.   : 0.000  
##  1st Qu.: 1.000  
##  Median : 1.000  
##  Mean   : 1.568  
##  3rd Qu.: 2.000  
##  Max.   :21.000

sum(aa$number) # get the sample size

## [1] 425

mean(aa$number)

## [1] 1.568266

sd(aa$number)

## [1] 1.907575

# subsetting bird count in maize fields
bb <- subset(z, crop.type == "Maize", select = "number")
mean(bb$number)

## [1] 2.026042

sd(bb$number)

## [1] 3.357316

sum(bb$number)

## [1] 389

# substting bird count in mix crop fields
cc <- subset(z, crop.type == "Mixed", select = "number")
sum(z$number) # get the sample size

## [1] 1486

mean(cc$number)

## [1] 1.898305

sd(cc$number)

## [1] 1.933995

sum(cc$number)

## [1] 672

Simulating data using values from original data

nGroup <- 3 # number of treatment groups
nName <- c("Legume","Maize", "Mix") # names of groups
nSize <- c(425,389,672) # number of observations in each group
nMean <- c(1.8,2.0,1.9) # mean of each group
nSD <- c(1.9,3.4,1.9) # standardd deviation of each group

ID <- 1:(sum(nSize))

countc <- c(rnorm(n=nSize[1],mean=nMean[1],sd=nSD[1]),
            rnorm(n=nSize[2],mean=nMean[2],sd=nSD[2]),
            rnorm(n=nSize[3],mean=nMean[3],sd=nSD[3]))
cGroup <- rep(nName,nSize)
Croptype <- data.frame(ID,cGroup,countc)

# performing ANOVA
ANOmodel <- aov(countc~cGroup,data=Croptype)
print(ANOmodel)

## Call:
##    aov(formula = countc ~ cGroup, data = Croptype)
## 
## Terms:
##                   cGroup Residuals
## Sum of Squares    29.755  8631.502
## Deg. of Freedom        2      1483
## 
## Residual standard error: 2.412529
## Estimated effects may be unbalanced

print(summary(ANOmodel))

##               Df Sum Sq Mean Sq F value Pr(>F)  
## cGroup         2     30   14.88   2.556 0.0779 .
## Residuals   1483   8632    5.82                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# plotting graph
ANOPlot <- ggplot(data=Croptype,aes(x=cGroup,y=countc,fill=cGroup)) +
  geom_boxplot()
print(ANOPlot)

There was no significant difference between the means of the bird counts in the three field types even though total abundance varied significantly across field types

I am now going to adjust the sample size of all three field types to see if it will be enough to cause a significant difference.

nGroup <- 3 # number of treatment groups
nName <- c("Legume","Maize", "Mix") # names of groups
nSize <- c(620,589,672) # number of observations in each group
nMean <- c(1.8,2.0,1.9) # mean of each group
nSD <- c(1.9,3.4,1.9) # standardd deviation of each group

ID <- 1:(sum(nSize))

countc <- c(rnorm(n=nSize[1],mean=nMean[1],sd=nSD[1]),
            rnorm(n=nSize[2],mean=nMean[2],sd=nSD[2]),
            rnorm(n=nSize[3],mean=nMean[3],sd=nSD[3]))
cGroup <- rep(nName,nSize)
Croptype <- data.frame(ID,cGroup,countc)

# performing ANOVA
ANOmodel <- aov(countc~cGroup,data=Croptype)
print(ANOmodel)

## Call:
##    aov(formula = countc ~ cGroup, data = Croptype)
## 
## Terms:
##                    cGroup Residuals
## Sum of Squares     67.867 11756.045
## Deg. of Freedom         2      1878
## 
## Residual standard error: 2.501974
## Estimated effects may be unbalanced

print(summary(ANOmodel))

##               Df Sum Sq Mean Sq F value  Pr(>F)   
## cGroup         2     68   33.93   5.421 0.00449 **
## Residuals   1878  11756    6.26                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# plotting graph
ANOPlot <- ggplot(data=Croptype,aes(x=cGroup,y=countc,fill=cGroup)) +
  geom_boxplot()
print(ANOPlot)

Adjusting the sample size of the lowest group to within the size of the largest field such that the difference between the three groups was about 50 and 70 birds respectively.

Next, I adjusted the mean of legume field and kept the sample size unchanged

nName <- c("Legume","Maize", "Mix") # names of groups
nSize <- c(425,389,672) # number of observations in each group
nMean <- c(1.0,2.0,1.9) # mean of each group
nSD <- c(1.9,3.4,1.9) # standardd deviation of each group

ID <- 1:(sum(nSize))

countc <- c(rnorm(n=nSize[1],mean=nMean[1],sd=nSD[1]),
            rnorm(n=nSize[2],mean=nMean[2],sd=nSD[2]),
            rnorm(n=nSize[3],mean=nMean[3],sd=nSD[3]))
cGroup <- rep(nName,nSize)
Croptype <- data.frame(ID,cGroup,countc)

# performing ANOVA
ANOmodel <- aov(countc~cGroup,data=Croptype)
print(ANOmodel)

## Call:
##    aov(formula = countc ~ cGroup, data = Croptype)
## 
## Terms:
##                   cGroup Residuals
## Sum of Squares   350.250  8774.472
## Deg. of Freedom        2      1483
## 
## Residual standard error: 2.432428
## Estimated effects may be unbalanced

print(summary(ANOmodel))

##               Df Sum Sq Mean Sq F value   Pr(>F)    
## cGroup         2    350  175.13    29.6 2.49e-13 ***
## Residuals   1483   8774    5.92                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# plotting graph
ANOPlot <- ggplot(data=Croptype,aes(x=cGroup,y=countc,fill=cGroup)) +
  geom_boxplot()
print(ANOPlot)

Changing the mean of birds recorded in legume field from 1.8 to 1.0 yielded a significant difference in the results. Thus manipulating either the sample size or mean yields a difference in results.

Homework_07

Carlos Amissah

2024-03-06