Implementing Biomass Predictions

This document provides the code used to implement my linear regressions equations on my dataset and compute the missing biomass values.
Author

Alexis Means

Published

October 1, 2025

Data Wrangling

Code
library(tidyverse)
library(MuMIn)
library(terra)
library(broom)
library(readr)
library(readxl)
library(sf)

setwd("C:/Users/Alexis Means/Documents/Project/Nutrition Sampling/R code/biomass")

# Load the global models
spp_models <- readRDS('regression_equations/24and25-Biomass-Regression-Species-Top-Model-List.rds')
genus_models <- readRDS('regression_equations/24and25-Biomass-Regression-Genus-Top-Model-List.rds')
family_models <- readRDS('regression_equations/24and25-Biomass-Regression-Family-Top-Model-List.rds')
functionalgroup_models <- readRDS('regression_equations/24and25-Biomass-Regression-Functional-Group-Top-Model-List.rds')


#2024 and 2025 observations 
db <- "C:/Users/Alexis Means/Documents/Project/Nutrition Sampling/VegDatabases/Working.composition.data.xlsx"
biomass <- read_excel(db, "Biomass")
comp <- read_excel(db, "Composition")
transect <- read_excel(db, "Transect")
plant <- read_excel(db, "PlantList")

#Organizing and joining databases####
comp <- comp %>% 
  mutate( Date = ymd(Date),          
          julian = yday(Date)         
  ) %>% 
  select(julian, PlotID, Quadrat, Spp, Percent, Pheno, Part)


biomass <- biomass %>% 
  rename(DryWeight = `DryWeight(g)`) %>% 
  select(PlotID, Quadrat, Spp, Pheno, Part, DryWeight)



transect_sf <- st_as_sf(transect,
                        coords = c("BeginLong", "BeginLat"), 
                        crs = 4326)   # WGS84 (lat/long)
transect_utm <- st_transform(transect_sf, crs = 32610)
coords <- st_coordinates(transect_utm)

transect <- transect %>%
  mutate(BeginUTM_Easting = coords[,1],
         BeginUTM_Northing = coords[,2]) %>% 
  select(PVT, PlotID, BeginUTM_Northing, BeginUTM_Easting)



plant <- plant %>% 
  select(Spp, Family, Genus, Species, FG_New) %>% 
  rename(FunctionalGroup = FG_New)



#create object for inorganic matter 
abiotic <- c("LITTER","EARTH", "LICHEN", "WATER", "ROCKS")


#combining and creating an overall %cover for inorganic matter in each quadrat
comp <- comp %>% 
  group_by(PlotID, Quadrat) %>% 
  mutate(Abiotic.cover = sum(Percent[(Spp %in% abiotic)])) %>% 
  ungroup()


#Left join biomass and comp by quadrat and plot_ID
comp <- comp %>% 
  left_join(y = biomass,
            by = c("PlotID", "Quadrat", "Spp", "Pheno", "Part")) 


#filtering out all quadrats where no biomass is recorded for the quadrat
#filter out all of the abiotic observations
comp <- comp %>% 
  filter(!(Spp %in% abiotic)) 


## left join columns from plant list, comp, biomass and transect to keep the necessary columns 
comp <- comp %>% 
  left_join(y = plant,
            by = "Spp") %>%  
  left_join(y=transect,
            by = "PlotID", 
            relationship = "many-to-one") #since there is multiple plotID duplicates this is necessary to join the transect database


comp <- comp %>% 
  select(julian, PVT, PlotID, Quadrat, Spp, Pheno, Part, Abiotic.cover,
         DryWeight, Percent, Family, Genus, Species, FunctionalGroup, BeginUTM_Easting,
         BeginUTM_Northing)


# Ensure there's an id column
if (!"ID" %in% colnames(comp)) {
  comp$ID <- 1:nrow(comp)
}

# Create empty list to load the covariates into
covariates <- vector(mode = 'list')

# List all the covariates that you want to use
covariates$asp <- rast("C:/Users/Alexis Means/OneDrive/OneDrive - University of Idaho/DocuMents/Project/Nutrition Sampling/Rasters/LF_Asp/LC20_Asp_220.tif")
covariates$elev <- rast("C:/Users/Alexis Means/OneDrive/OneDrive - University of Idaho/DocuMents/Project/Nutrition Sampling/Rasters/LF_Elev/LC20_Elev_220.tif")

# Make a shape object out of the coordinates that exist in comp
coords <- vect(comp,
               geom = c("BeginUTM_Easting","BeginUTM_Northing"),
               crs = "EPSG:32610")

for (i in 1:length(covariates)){
  proj.coords = project(x = coords,
                        y = covariates[[i]])
  out = extract(x = covariates[[i]],
                y = proj.coords)
  comp[,names(covariates[i])] = out [,2]
}

#make sure everything is being read the way we want it to 
comp$elev = as.numeric(comp$elev)
comp$Family = as.factor(comp$Family)
comp$Genus= as.factor(comp$Genus)
comp$asp = as.numeric(comp$asp)
comp$Spp= as.factor(comp$Spp)
comp$DryWeight= as.numeric(comp$DryWeight)


#restructure df 
comp <- comp %>% 
  select(ID, PlotID, Spp, Family, Genus, DryWeight, Percent, julian, PVT, Abiotic.cover, elev, asp, FunctionalGroup)

model.y <- comp %>% 
  select(ID, PlotID, Spp, Family, Genus, DryWeight, PVT, FunctionalGroup)
model.x <- comp %>% 
  select(Percent, julian, Abiotic.cover, elev, asp) 

Transform Covariates

Code
# Define covariate transformations
transformations <- function(x) {
  data.frame(
    identity = x,
    sin = sin(x),
    cos = cos(x),
    quadratic = x^2,
    cubic = x^3,
    log = ifelse(x > 0, log(x), NA),
    sqrt = ifelse(x > 0, sqrt(x), NA)
  )
}


#calculate transformed covariates
transform.covars = lapply(X = model.x,
                          FUN = transformations)
transform.covars = do.call(cbind, transform.covars)


#rename columns
colnames(transform.covars) = unlist(lapply(X = c("Percent", "julian", "Abiotic.cover", "elev", "asp"),
                                           FUN = function(var) {
                                             paste0(var, '_', c('identity', 'sin', 'cos', 'quadratic', 'cubic', 'log', 'sqrt'))
                                           }))



#scale and center predictor variables
model.x = transform.covars %>% 
  mutate(across(everything(), ~ (.-mean(.))/sd(.))) %>% 
  select_if(~ !any(is.na(.)))

data <- cbind(model.y, model.x)


data$Family = as.character(data$Family)
data$Genus= as.character(data$Genus)
data$Spp= as.character(data$Spp)
data$DryWeight= as.numeric(data$DryWeight)

Biomass Predict Function

Code
fill_dryweight <- function(data, spp_models, genus_models, family_models, functionalgroup_models) {
  # initialize column with observed values
  data$DryWeight_pred <- data$DryWeight
  
  # loop over rows with missing DryWeight
  for (i in which(is.na(data$DryWeight))) {
    row <- data[i, , drop = FALSE]  # keep it as a dataframe
    pred <- NA
    
    # try species model
    if ("Spp" %in% names(row) && !is.na(row$Spp) && row$Spp %in% names(spp_models)) {
      mod <- spp_models[[row$Spp]]
      pred <- predict(mod, newdata = row)
    }
    
    # fallback: genus model
    if (is.na(pred) && "Genus" %in% names(row) && !is.na(row$Genus) && row$Genus %in% names(genus_models)) {
      mod <- genus_models[[row$Genus]]
      pred <- predict(mod, newdata = row)
    }
    
    # fallback: family model
    if (is.na(pred) && "Family" %in% names(row) && !is.na(row$Family) && row$Family %in% names(family_models)) {
      mod <- family_models[[row$Family]]
      pred <- predict(mod, newdata = row)
    }
    
    # fallback: functional group model
    if (is.na(pred) && "FunctionalGroup" %in% names(row) && !is.na(row$FunctionalGroup) && row$FunctionalGroup %in% names(functionalgroup_models)) {
      mod <- functionalgroup_models[[row$FunctionalGroup]]
      pred <- predict(mod, newdata = row)
    }
    
    # save prediction if found, rounded to 2 decimals
    if (!is.na(pred)) {
      data$DryWeight_pred[i] <- round(pred, 2)
    }
  }
  
  return(data)
}

df <- fill_dryweight(
  data, 
  spp_models, 
  genus_models, 
  family_models, 
  functionalgroup_models
)

predictions <- df %>% 
  select(ID, PlotID, Spp, DryWeight, DryWeight_pred)


percent <- comp %>% 
  select(ID, Percent)

predictions <- predictions %>% 
  left_join(percent, by = "ID")

View(predictions)
#library(writexl)

# save dataframe to Excel
#write_xlsx(predictions, "C:/Users/Alexis Means/Documents/Project/Nutrition Sampling/R code/biomass/24and25predictions.xlsx")