# Set the path to the main folder containing subfolders
main_folder <- "C:/Users/carlos/Desktop/OriginalData"
# Get the list of subfolders within the main folder
subfolders <- list.dirs(main_folder, full.names = TRUE, recursive = FALSE)
# Define the file pattern
file_pattern <- "NEON.D01.BART.DP1.10003.001.brd_countdata.*\\.csv"
# Create a folder to hold the files of interest
Nfolder <- "C:/Users/carlos/Desktop/NewFolder"
if (!file.exists(Nfolder)) {
dir.create(Nfolder)
}
# Loop through each subfolder
for (folder in subfolders) {
# Get the list of files in the subfolder
files <- list.files(folder, pattern = file_pattern, full.names = TRUE)
# If there are matching files, copy them to the new folder
if (length(files) > 0) {
for (file in files) {
file.copy(file, paste0(Nfolder, "/", basename(file)))
}
} else {
cat("No files matching", file_pattern, "found in", folder, "\n")
}
}
## No files matching NEON.D01.BART.DP1.10003.001.brd_countdata.*\.csv found in C:/Users/carlos/Desktop/OriginalData/.Rproj.user
## No files matching NEON.D01.BART.DP1.10003.001.brd_countdata.*\.csv found in C:/Users/carlos/Desktop/OriginalData/gitsandbox
#2 cleaning data
library(stringr)# required to use str_extract()
# Function to clean the data
clean_data <- function(data) {
data <- data[complete.cases(data$scientificName), ]
return(data)
}
# Function to extract the year from the file name
extract_year <- function(file_name) {
year <- str_extract(file_name, "\\d{4}") # Extract only the year
return(year)
}
# Function to calculate abundance
calculate_abundance <- function(data) {
abundance <- nrow(data) # Abundance is the number of rows
return(abundance)
}
# Function to calculate species richness
calculate_species_richness <- function(data) {
richness <- length(unique(data$scientificName)) # Richness is the number of unique species
return(richness)
}
# Set the directory where the files are located
n_dir <- "C:/Users/carlos/Desktop/Newfolder"
# Create a list with the files in the working Directory
files <- list.files(n_dir, pattern = "NEON.D01.BART.DP1.10003.001.brd_countdata..*\\.csv$", full.names = TRUE)
# Loop through each file
for (file in files) {
# Extract the year from the file name
year <- extract_year(basename(file))
# Read the data from the file
data <- read.csv(file)
# Clean the data
cleaned_data <- clean_data(data)
# Calculate abundance
abundance <- calculate_abundance(cleaned_data)
# Calculate species richness
richness <- calculate_species_richness(cleaned_data)
# Print results
cat("File:", basename(file), "\n")
cat("Year:", year, "\n")
cat("Abundance:", abundance, "\n")
cat("Species Richness:", richness, "\n")
cat("\n")
}
## File: NEON.D01.BART.DP1.10003.001.brd_countdata.2015-06.basic.20231226T232626Z.csv
## Year: 1000
## Abundance: 454
## Species Richness: 40
##
## File: NEON.D01.BART.DP1.10003.001.brd_countdata.2016-06.basic.20231227T013428Z.csv
## Year: 1000
## Abundance: 883
## Species Richness: 39
##
## File: NEON.D01.BART.DP1.10003.001.brd_countdata.2017-06.basic.20231227T094709Z.csv
## Year: 1000
## Abundance: 685
## Species Richness: 35
##
## File: NEON.D01.BART.DP1.10003.001.brd_countdata.2018-06.basic.20231228T172744Z.csv
## Year: 1000
## Abundance: 772
## Species Richness: 37
##
## File: NEON.D01.BART.DP1.10003.001.brd_countdata.2019-06.basic.20231227T184129Z.csv
## Year: 1000
## Abundance: 628
## Species Richness: 44
##
## File: NEON.D01.BART.DP1.10003.001.brd_countdata.2020-06.basic.20231227T224944Z.csv
## Year: 1000
## Abundance: 626
## Species Richness: 46
##
## File: NEON.D01.BART.DP1.10003.001.brd_countdata.2020-07.basic.20231227T225020Z.csv
## Year: 1000
## Abundance: 89
## Species Richness: 18
##
## File: NEON.D01.BART.DP1.10003.001.brd_countdata.2021-06.basic.20231228T010546Z.csv
## Year: 1000
## Abundance: 1015
## Species Richness: 50
##
## File: NEON.D01.BART.DP1.10003.001.brd_countdata.2022-06.basic.20231229T053256Z.csv
## Year: 1000
## Abundance: 699
## Species Richness: 39
#####Create an initial empty data frame to hold the above summary statistics-you should have 4 columns, one for the file name, one for abundance, one for species richness, and one for year.
# Create a new data frame to store the results
results <- data.frame(File = character(),
Date = character(),
Abundance = numeric(),
Richness = numeric(),
stringsAsFactors = FALSE) # Ensuring strings are treated as characters
# Loop through each file in the folder
for (file in files) {
# Extract the year from the file name
year <- extract_year(basename(file))
# Read the data from the file
data <- read.csv(file)
# Clean the data
cleaned_data <- clean_data(data)
# Calculate abundance
abundance <- calculate_abundance(cleaned_data)
# Calculate species richness
richness <- calculate_species_richness(cleaned_data)
# Add the results to the data frame
results <- rbind(results, data.frame(File = basename(file),
Date = year,
Abundance = abundance,
Richness = richness,
stringsAsFactors = FALSE)) # Ensuring strings are treated as characters
}
# Print the results
print(results)
## File
## 1 NEON.D01.BART.DP1.10003.001.brd_countdata.2015-06.basic.20231226T232626Z.csv
## 2 NEON.D01.BART.DP1.10003.001.brd_countdata.2016-06.basic.20231227T013428Z.csv
## 3 NEON.D01.BART.DP1.10003.001.brd_countdata.2017-06.basic.20231227T094709Z.csv
## 4 NEON.D01.BART.DP1.10003.001.brd_countdata.2018-06.basic.20231228T172744Z.csv
## 5 NEON.D01.BART.DP1.10003.001.brd_countdata.2019-06.basic.20231227T184129Z.csv
## 6 NEON.D01.BART.DP1.10003.001.brd_countdata.2020-06.basic.20231227T224944Z.csv
## 7 NEON.D01.BART.DP1.10003.001.brd_countdata.2020-07.basic.20231227T225020Z.csv
## 8 NEON.D01.BART.DP1.10003.001.brd_countdata.2021-06.basic.20231228T010546Z.csv
## 9 NEON.D01.BART.DP1.10003.001.brd_countdata.2022-06.basic.20231229T053256Z.csv
## Date Abundance Richness
## 1 1000 454 40
## 2 1000 883 39
## 3 1000 685 35
## 4 1000 772 37
## 5 1000 628 44
## 6 1000 626 46
## 7 1000 89 18
## 8 1000 1015 50
## 9 1000 699 39