This notebook assembles and cleans heterogeneous livestock-nutrition trials into a single, analysis-ready dataset that can be used to quantify enteric-methane (CH₄) emissions and emission-intensity for cattle, sheep and goats. Using R, we use ERA metadata, harmonise units, fill nutritional gaps (five-step cascade: original paper → Feedipedia → ILRI feeds DB → proximate-composition equation of Weiss & Tebbe → ingredient means), and reconstruct diet-level gross energy (GE) where necessary.
Daily GE intake is converted to annual CH₄ with the IPCC Tier 2 equation. Methane-conversion factors (Ym) follow the 2019 IPCC defaults but are adjusted for diet forage level and dairy productivity where the data allow. Absolute emissions are then paired with weight-gain and milk-/meat-yield records to derive emission-intensity (kg CH₄ kg-product⁻¹). A change-detection routine classifies every treatment diet relative to its control (addition, substitution, dosage change) so diet strategies can be linked directly to shifts in EI.
Ruminants in sub-Saharan Africa often operate far below potential productivity; improving feed quality can therefore lower both absolute and per-kilogram emissions. By turning hundred of individual studies into a coherent Tier-2 dataset, this pipeline provides a transparent basis for (i) benchmarking baseline emissions, (ii) identifying high-leverage nutritional interventions, and (iii) stress-testing alternative CH₄ equations or region-specific Ym values.
A detailed methodological rationale is given in Livestock Emission Method.docx.
This section installs and loads required packages, connects to the S3 bucket containing ERA data, and loads both the livestock and camel metadata into memory.
# Install and load 'pacman' package manager if not already installed
library(ERAg)
library(stringr)
library(knitr)
library(dplyr)
library(DT)
library(readxl)
library(tidyr)
library(arrow)
library(ggplot2)
library(data.table)
library(purrr)
library(janitor)
library(ggalt)
# Set up connection to S3 bucket
s3 <- s3fs::S3FileSystem$new(anonymous = TRUE)
era_s3 <- "s3://digital-atlas/era"
# List the files in the s3 bucket
files <- s3$dir_ls(file.path(era_s3,"data"))
# Identify the latest skinny_cow_2022-YYYY-MM-DD.RData file
files <- tail(grep(".RData", grep("skinny_cow_2022", files, value=TRUE), value=TRUE), 1)
# Download to local if not already present
save_path <- file.path(getwd(), basename(files))
if(!file.exists(save_path)){
s3$file_download(files, save_path, overwrite = TRUE)
}
# Load the data
livestock_metadata <- miceadds::load.Rdata2(file=basename(save_path), path=dirname(save_path))
# Set up connection to S3 bucket for camel data
s3 <- s3fs::S3FileSystem$new(anonymous = TRUE)
era_s3 <- "s3://digital-atlas/era"
# List the files in the s3 bucket
files <- s3$dir_ls(file.path(era_s3,"data"))
# Identify the latest skinny_cow_2022-YYYY-MM-DD.RData file
files <- tail(grep(".RData", grep("courageous_camel_2024", files, value=TRUE), value=TRUE), 1)
# Download to local if not already present
save_path <- file.path(getwd(), basename(files))
if(!file.exists(save_path)){
s3$file_download(files, save_path, overwrite = TRUE)
}
# Load the data
camel <- miceadds::load.Rdata2(file=basename(save_path), path=dirname(save_path))
missing_unit_feed_intake<- camel$Data.Out %>%
select(B.Code, T.Name, ED.Intake.Item,
ED.Mean.T, ED.Error, Out.Unit, Out.Subind,Out.Code.Joined) %>%
filter(Out.Subind == "Feed Intake") %>%
filter(is.na(Out.Unit))
fill in feed intakes
# #Add units where they are NA for Feed-Intake rows
# camel$Data.Out[
# Out.Subind == "Feed Intake" & is.na(Out.Unit), # target rows
# Out.Unit := sub("^.*?\\.\\.(.*?)\\.\\..*$", "\\1", Out.Code.Joined) # extract middle chunk
# ]
#
# feed_intake<-camel$Data.Out%>%
# filter(Out.Subind=="Feed Intake")
#
# sort(unique(feed_intake$Out.Unit), na.last = TRUE) # A-Z, NAs at the end
##### 0. column templates ------------------------------------------------------
diet_cols <- c("B.Code","A.Level.Name","D.Item","D.Type","D.Amount",
"D.Unit.Amount","D.Unit.Time","D.Unit.Animals",
"DC.Is.Dry","D.Ad.lib")
mt_cols <- c("B.Code","A.Level.Name","T.Animals","T.Name","T.Control")
comp_cols <- c("B.Code","D.Item","is_entire_diet",
"DC.Variable","DC.Value","DC.Unit")
digest_cols <- c("B.Code","D.Item","is_entire_diet",
"DD.Variable","DD.Value","DD.Unit")
data_cols <- c("B.Code","A.Level.Name","Out.Code.Joined",
"ED.Intake.Item","ED.Intake.Item.Raw",
"ED.Mean.T","ED.Error","Out.Subind","Out.Unit",
"Out.WG.Start","Out.WG.Unit","is_entire_diet","T.Animals")
prod_cols <- c("B.Code","P.Product")
##### 1. livestock tables (unchanged) ----------------------------------------
diet_livestock <- livestock_metadata$Animals.Diet [ , ..diet_cols ][, Source:="livestock"]
mt_livestock <- livestock_metadata$MT.Out [ , ..mt_cols ][, Source:="livestock"]
comp_livestock <- livestock_metadata$Animals.Diet.Comp [ , ..comp_cols ][, Source:="livestock"]
digest_livestock <- livestock_metadata$Animals.Diet.Digest [ , ..digest_cols][, Source:="livestock"]
data_livestock <- livestock_metadata$Data.Out [ , ..data_cols ][, Source:="livestock"]
prod_livestock <- livestock_metadata$Prod.Out [ , ..prod_cols ][, Source:="livestock"]
##### 2. camel – build one key that links everything --------------------------
## 2·1 T-level information from MT.Out (diet names + control flag)
diet_key <- camel$MT.Out[ , .(B.Code, T.Name, A.Level.Name, T.Control,Herd.Level.Name,Herd.Sublevel.Name)]
## 2·2 Animal numbers & start-weights from Herd.Out
herd_info <- camel$Herd.Out[ ,
.(B.Code,
Herd.Level.Name,
Herd.Sublevel.Name,
Herd.Sex,
Herd.Stage,
T.Animals = Herd.N,
Out.WG.Start = Herd.Start.Weight,
Out.WG.Unit = Herd.Start.Weight.Unit)
]
##### 3. camel tables ----------------------------------------------------------
## 3·1 Animals.Diet
diet_camel <- camel$Animal.Diet[ , ..diet_cols ][ , Source:="camel"]
## 3·2 MT.Out (add T.Animals via herd_info)
mt_camel <- merge(diet_key,
herd_info,
by = c("B.Code","Herd.Level.Name","Herd.Sublevel.Name"),
all.x = TRUE)[, Source := "camel"]
# ..mt_cols **includes T.Animals**
## 3·3 Comp / Digest
comp_camel <- camel$Animal.Diet.Comp[ ,
.(B.Code, D.Item, is_entire_diet,
DC.Variable = DN.Variable,
DC.Value = DN.Value,
DC.Unit = DN.Unit)][, Source:="camel"]
digest_camel <- camel$Animal.Diet.Digest[ ,
.(B.Code, D.Item, is_entire_diet,
DD.Variable, DD.Value, DD.Unit)][, Source:="camel"]
## 3·4 Data.Out (join diet_key → A.Level.Name, then herd_info)
data_camel <- merge(camel$Data.Out, # brings in T.Name*
diet_key, # adds A.Level.Name & control
by = c("B.Code","T.Name"),
all.x = TRUE, sort = FALSE)
data_camel <- merge(data_camel, herd_info, by = c("B.Code","Herd.Level.Name","Herd.Sublevel.Name"),
all.x = TRUE, sort = FALSE)
data_camel <- data_camel[ ,
.(B.Code, A.Level.Name,
ED.Intake.Item,
ED.Intake.Item.Raw = ED.Intake.Item,
ED.Mean.T, ED.Error, Out.Unit, Out.Subind,
Out.WG.Start, Out.WG.Unit,
T.Animals)][, Source:="camel"]
## 3·5 product table
prod_camel <- camel$Herd.Out[ ,
.(B.Code,
P.Product = V.Product,
Herd.Stage)][, Source:="camel"]
##### 4·a. harmonise Pub.Out --------------------------------------------------
# Get all columns from both pub tables
pub_cols <- union(names(livestock_metadata$Pub.Out), names(camel$Pub.Out))
# Add missing columns to each table
for (col in setdiff(pub_cols, names(livestock_metadata$Pub.Out))) {
livestock_metadata$Pub.Out[, (col) := NA]
}
for (col in setdiff(pub_cols, names(camel$Pub.Out))) {
camel$Pub.Out[, (col) := NA]
}
# Reorder columns to match
setcolorder(livestock_metadata$Pub.Out, pub_cols)
setcolorder(camel$Pub.Out, pub_cols)
# Add Source columns (optional, for tracking origin)
livestock_metadata$Pub.Out[, Source := "livestock"]
camel$Pub.Out[, Source := "camel"]
##### 4. bind camel + livestock ----------------------------------------------
diet_all <- rbindlist(list(diet_livestock, diet_camel ), fill = TRUE)
mt_all <- rbindlist(list(mt_livestock, mt_camel ), fill = TRUE)
comp_all <- rbindlist(list(comp_livestock, comp_camel ), fill = TRUE)
digest_all <- rbindlist(list(digest_livestock, digest_camel), fill = TRUE)
data_all <- rbindlist(list(data_livestock, data_camel ), fill = TRUE)
prod_all <- rbindlist(list(prod_livestock, prod_camel ), fill = TRUE)
pub_all <- rbind(livestock_metadata$Pub.Out,camel$Pub.Out)
pub_all <- rbindlist(list(livestock_metadata$Pub.Out, camel$Pub.Out), fill = TRUE)
merged_metadata <- list(
Pub.Out= pub_all,
Animals.Diet = diet_all,
MT.Out = mt_all,
Animals.Diet.Comp = comp_all,
Animals.Diet.Digest = digest_all,
Data.Out = data_all,
Prod.Out = prod_all
)
# Clean up intermediate objects
rm(
diet_cols, mt_cols, comp_cols, digest_cols, data_cols, prod_cols,
diet_livestock, mt_livestock, comp_livestock, digest_livestock, data_livestock, prod_livestock,
diet_camel, mt_camel, comp_camel, digest_camel, data_camel, prod_camel, herd_weight,comp_all,data_all,diet_all,digest_all,mt_all,prod_all,livestock_metadata,camel,herd_info,diet_key)
# (Optional) Run gc() to free memory
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3727077 199.1 7450001 397.9 4703350 251.2
## Vcells 7085788 54.1 16486262 125.8 14341494 109.5
When t control is yes for one of the diets then automatically enter No for the other diets of the same paper. Also for missing t control if the name of the diet contains control or Con or Zero then automatically deduce this is a control diet and others are treatment diets.
MT <- copy(merged_metadata$MT.Out)
MT[, T.Control := tolower(trimws(as.character(T.Control))) ]
MT[
## diet name suggests a control …
grepl("\\bzero\\b|\\bcon\\b|control", A.Level.Name, ignore.case = TRUE) &
## … but flag is still empty / ambiguous
(is.na(T.Control) | T.Control %in% c("0", "false","na")),
T.Control := "yes"
]
MT[, T.Control := {
has_ctrl <- any(T.Control == "yes", na.rm = TRUE) # does study have a control?
if (has_ctrl) {
# 4·a normalise obvious “no” variants
flag <- fifelse(
T.Control %in% c("false", "0", "n", "no"), "no", T.Control
)
# 4·b ensure exactly one category: yes / no
fifelse(flag == "yes", "yes", "no")
} else {
# keep whatever was there (even "0", "false", NA, …)
T.Control
}
}, by = B.Code]
MT[, T.Control := fifelse(
(is.na(T.Control) | T.Control %in% c("", "na")) & any(T.Control == "yes", na.rm = TRUE),
"no",
T.Control
), by = B.Code]
merged_metadata$MT.Out <- MT
controls_multi <- MT[
T.Control == "yes",
.SD[uniqueN(A.Level.Name) > 1],
by = B.Code
]
controls_multi[, .(B.Code, A.Level.Name, T.Control)]
## B.Code A.Level.Name T.Control
## <char> <char> <char>
## 1: HK0287 Conc Commercial...Concentrate HM yes
## 2: HK0287 Conc Commercial yes
## 3: HK0287 Concentrate HM yes
## 4: JS0340 MSP0 Enzyme...MSP0 Yeast yes
## 5: JS0340 MSP0 Enzyme yes
## ---
## 265: JO1088 100g MOBYS RS yes
## 266: JO1088 200g MOBYS RS yes
## 267: JO1088 300g MOBYS RS yes
## 268: JO1095 Diet 1 yes
## 269: JO1095 Diet 5 yes
Some feed intake entries are not matched to any ingredient data due to discrepancies in A.Level.Name between the feed intake and ingredient datasets for the same study codes. In this section, we build a table to flag studies where diet names do not align, helping to identify cases that may require manual review or correction.
# Step 1: Prepare cleaned input
ingredient_levels <- merged_metadata$Animals.Diet %>%
distinct(B.Code, A.Level.Name) %>%
mutate(Source = "Ingredient")
feedintake_levels <- merged_metadata$Data.Out %>%
filter(Out.Subind == "Feed Intake", !is.na(A.Level.Name)) %>%
distinct(B.Code, A.Level.Name) %>%
mutate(Source = "FeedIntake")
# Step 2: Create mismatch table
combined_levels <- bind_rows(ingredient_levels, feedintake_levels)
comparison_table <- combined_levels %>%
group_by(B.Code, A.Level.Name) %>%
summarise(
Sources = paste(sort(unique(Source)), collapse = " & "),
.groups = "drop"
) %>%
mutate(
Match_Status = case_when(
Sources == "FeedIntake & Ingredient" ~ "Match",
Sources == "Ingredient" ~ "Only in Ingredient",
Sources == "FeedIntake" ~ "Only in FeedIntake",
TRUE ~ "Unknown"
)
)
# Step 3: Identify B.Codes with exactly 1 unmatched in both
to_correct <- comparison_table %>%
filter(Match_Status %in% c("Only in Ingredient", "Only in FeedIntake")) %>%
group_by(B.Code) %>%
summarise(
n_ing = sum(Match_Status == "Only in Ingredient"),
n_feed = sum(Match_Status == "Only in FeedIntake"),
Ingredient_Name = A.Level.Name[Match_Status == "Only in Ingredient"][1],
FeedIntake_Name = A.Level.Name[Match_Status == "Only in FeedIntake"][1],
.groups = "drop"
) %>%
filter(n_ing == 1, n_feed == 1)
# Step 4: Apply the corrections to Data.Out (rename unmatched FeedIntake names)
merged_metadata$Data.Out <- merged_metadata$Data.Out %>%
left_join(to_correct, by = c("B.Code", "A.Level.Name" = "FeedIntake_Name")) %>%
mutate(
A.Level.Name = coalesce(Ingredient_Name, A.Level.Name)
) %>%
select(-Ingredient_Name)
rm(ingredient_levels,feedintake_levels,to_correct,combined_levels,comparison_table)
# Get merged stats
merged_stats <- list(
papers = length(unique(merged_metadata$Animals.Diet$B.Code)),
diets = length(unique(merged_metadata$Animals.Diet$A.Level.Name)),
ingredients = length(unique(merged_metadata$Animals.Diet$D.Item))
)
Some ingredients appear multiple times with amounts reported in different units. In this section, we prioritize entries that are already in preferred units to minimize the need for conversions.
## keep ingredient that appear twice with different units only once
# Define your unit preferences
preferred_units <- c("g", "kg")
preferred_time <- "day"
preferred_animals <- "individual"
# Prioritize and deduplicate
merged_metadata$Animals.Diet <- merged_metadata$Animals.Diet %>%
# Add a priority score to sort by best rows
mutate(
priority = case_when(
D.Unit.Amount %in% preferred_units &
D.Unit.Time == preferred_time &
D.Unit.Animals == preferred_animals ~ 1,
TRUE ~ 2 # lower priority
)
) %>%
group_by(B.Code, A.Level.Name, D.Item) %>%
slice_min(order_by = priority, with_ties = FALSE) %>%
ungroup() %>%
select(-priority) # optional: drop helper column
rm(preferred_units,preferred_time,preferred_animals)
### correct extraction errors
merged_metadata$Animals.Diet.Comp <- merged_metadata$Animals.Diet.Comp %>%
mutate(
DC.Value = ifelse(
B.Code == "BO1006" &
DC.Variable == "Ash" &
D.Item == "Ration 2" &
as.numeric(DC.Value) == 628,
6.28,
DC.Value
)
)
species_by_bcode <- merged_metadata$Prod.Out %>%
select(B.Code, P.Product) %>%
distinct()
# 2. Papers per species before filtering
# papers_before <- merged_metadata$Animals.Diet %>%
# select(B.Code) %>%
# distinct() %>%
# left_join(species_by_bcode, by = "B.Code") %>%
# count(P.Product, name = "papers_before")
# Step 1: Identify A.Level.Name (diet) that include Grazing
grazing_diets <- merged_metadata$Animals.Diet %>%
filter(D.Type == "Grazing") %>%
distinct(B.Code, A.Level.Name)
length(unique(grazing_diets$B.Code))
## [1] 63
# Step 2: Count how many non-grazing diets exist per B.Code
nongrazing_counts <- merged_metadata$Animals.Diet %>%
filter(!(D.Type == "Grazing")) %>%
distinct(B.Code, A.Level.Name) %>%
group_by(B.Code) %>%
summarise(n_nongrazing = n(), .groups = "drop")
length(unique(nongrazing_counts$B.Code))
## [1] 712
# Step 3: Keep only diets that are not marked as Grazing AND are in B.Codes with at least one non-grazing diet
merged_metadata$Animals.Diet <- merged_metadata$Animals.Diet %>%
anti_join(grazing_diets, by = c("B.Code", "A.Level.Name")) %>% # remove all diets that were grazing
semi_join(nongrazing_counts, by = "B.Code") # keep only papers with at least one non-grazing diet
species_by_bcode <- merged_metadata$Prod.Out %>%
select(B.Code, P.Product) %>%
distinct()
# 2. Papers per species before filtering
# papers_after <- merged_metadata$Animals.Diet %>%
# select(B.Code) %>%
# distinct() %>%
# left_join(species_by_bcode, by = "B.Code") %>%
# count(P.Product, name = "papers_after")
Some ingredients have more than one type, for those we merge the ingredients with the masterhseet to get the correct D.Type
# # Step 1: Load master sheet
# url <- "https://raw.githubusercontent.com/Mlolita26/ERA_Docs/main/era_master_sheet(7).xlsx"
# temp_file <- tempfile(fileext = ".xlsx")
# download.file(url, destfile = temp_file, mode = "wb")
#
# # Step 2: Read Ani diet and AOM sheets
# ani_diet <- read_excel(temp_file, sheet = "ani_diet")
# aom <- read_excel(temp_file, sheet = "AOM") %>% filter(L5 == "Feed Ingredient")
#
# # Step 3: Clean Ani diet — rename columns and trim whitespace
# ani_diet_clean <- ani_diet %>%
# rename_with(~ str_replace_all(., "\\s+", "_")) %>%
# mutate(across(everything(), ~ str_trim(as.character(.))))
#
# # Step 4: Create long-format lookup for D.Item corrections
# ditem_lookup <- ani_diet_clean %>%
# select(D.Item.Root.Comp.Proc_Major, D.Item, D.Item.Root.Other.Comp.Proc_All) %>%
# pivot_longer(cols = c(D.Item, D.Item.Root.Other.Comp.Proc_All),
# names_to = "Source_Field", values_to = "Name") %>%
# filter(!is.na(Name), !is.na(D.Item.Root.Comp.Proc_Major)) %>%
# separate_rows(Name, sep = "[,;]\\s*") %>% # ← changed line
# mutate(Name = str_to_lower(str_trim(Name)),
# D.Item.Root.Comp.Proc_Major = str_trim(D.Item.Root.Comp.Proc_Major)) %>%
# group_by(Name) %>%
# slice(1) %>%
# ungroup()
#
#
# # Step 5: Apply correction to Animals.Diet using the lookup
# animals_diet_updated <- merged_metadata$Animals.Diet %>%
# mutate(D.Item = str_to_lower(str_trim(D.Item))) %>%
# left_join(ditem_lookup, by = c("D.Item" = "Name")) %>%
# mutate(D.Item = coalesce(D.Item.Root.Comp.Proc_Major, D.Item)) %>%
# select(-D.Item.Root.Comp.Proc_Major)
#
# # Step 6: Clean and prepare AOM mapping
# aom_clean <- aom %>%
# rename_with(~ str_replace_all(., "\\s+", "_")) %>%
# mutate(
# Corrected_Type = ifelse(
# str_to_lower(L6) %in% c("other ingredients", "forage plants") & !is.na(L7),
# L7, # use L7 when rule matches
# L6 # otherwise keep L6
# ),
# Edge_Value = str_trim(Edge_Value),
# Synonym = str_trim(Synonym)
# )
#
# # Step 7: Build mapping table from Edge_Value + Synonym
# direct_match <- aom_clean %>%
# select(D.Item = Edge_Value, Corrected_Type) %>%
# filter(!is.na(D.Item))
#
# synonym_match <- aom_clean %>%
# filter(!is.na(Synonym)) %>%
# separate_rows(Synonym, sep = ";\\s*") %>%
# mutate(Synonym = str_trim(Synonym)) %>%
# select(D.Item = Synonym, Corrected_Type)
#
# aom_mapping <- bind_rows(direct_match, synonym_match) %>%
# mutate(D.Item = str_to_lower(str_trim(D.Item))) %>%
# distinct(D.Item, Corrected_Type)
#
# # Step 8: Join to assign Corrected_Type → D.Type
# animals_diet_updated <- animals_diet_updated %>%
# mutate(D.Item = str_to_lower(str_trim(D.Item))) %>%
# left_join(aom_mapping, by = "D.Item") %>%
# mutate(D.Type = ifelse(!is.na(Corrected_Type), Corrected_Type, D.Type)) %>%
# select(-Corrected_Type)
#
# # # Step 9: Save back to merged metadata
# merged_metadata$Animals.Diet <- animals_diet_updated
# #
# # # Step 10: Optional check — items with no match
# # check <- animals_diet_updated %>%
# # filter(is.na(Corrected_Type) & D.Type != "entire diet")
# #
also when same species so same in mastersheet they should have the same caterogy
those need to be categforized because there is nothing for d item
to_be_categorized<-merged_metadata$Animals.Diet%>%
filter(is.na(D.Item))%>%
filter(D.Type!="Entire Diet")
##1.8) Chnage corn to Maize
# ── Change “Corn” → “Maize” (whole-word match, case-insensitive) ─────────────
fix_corn <- function(df) {
df %>%
mutate(
D.Item = ifelse(
str_detect(trimws(D.Item), regex("^corn$", ignore_case = TRUE)),
"Maize",
D.Item
)
)
}
merged_metadata$Animals.Diet <- fix_corn(merged_metadata$Animals.Diet)
merged_metadata$Animals.Diet.Comp <- fix_corn(merged_metadata$Animals.Diet.Comp)
merged_metadata$Animals.Diet.Digest <- fix_corn(merged_metadata$Animals.Diet.Digest)
# Define species of interest
species_list <- c("Cattle", "Sheep", "Goat")
# Initialize result containers
species_summary <- list()
bcode_table <- list()
# Loop over each species
for (species in species_list) {
# All B.Codes for this species
species_bcodes <- merged_metadata$Prod.Out %>%
filter(P.Product == species) %>%
pull(B.Code) %>%
unique()
# Feed intake B.Codes
feed_data <- merged_metadata$Data.Out %>%
filter(B.Code %in% species_bcodes, !is.na(ED.Mean.T)) %>%
filter(Out.Subind=="Feed Intake")%>%
select(B.Code, D.Item = ED.Intake.Item)
feed_bcodes <- unique(feed_data$B.Code)
# GE B.Codes
ge_data <- merged_metadata$Animals.Diet.Comp %>%
filter(B.Code %in% species_bcodes, DC.Variable == "GE", !is.na(DC.Value)) %>%
select(B.Code, D.Item)
ge_bcodes <- unique(ge_data$B.Code)
# Matched B.Codes (join on both B.Code and D.Item)
matched_bcodes <- inner_join(feed_data, ge_data, by = c("B.Code", "D.Item")) %>%
pull(B.Code) %>%
unique()
# Store species-level summary
species_summary[[species]] <- tibble(
Species = species,
Total_BCodes = length(species_bcodes),
With_FEED = length(feed_bcodes),
With_GE = length(ge_bcodes),
With_BOTH = length(matched_bcodes)
)
# Store long-format bcode list
bcode_table[[species]] <- tibble(
B.Code = c(species_bcodes, feed_bcodes, ge_bcodes, matched_bcodes),
Category = rep(c("All", "With_FEED", "With_GE", "With_BOTH"),
times = c(length(species_bcodes), length(feed_bcodes), length(ge_bcodes), length(matched_bcodes))),
Species = species
) %>% distinct()
}
# Bind results
species_summary_df <- bind_rows(species_summary)
bcode_table_df <- bind_rows(bcode_table)
This step standardizes ingredient amount units (e.g., from mg, kg, %, etc.) to a consistent base (usually grams per individual per day), ensuring reliable downstream calculations and comparisons.
# diet ingredients: breakdown of all ingredients within each diet
ingredients <- merged_metadata$Animals.Diet %>%
dplyr::select(
B.Code,
A.Level.Name,
D.Item,
D.Type,
D.Amount,
D.Unit.Amount,
D.Unit.Time,
D.Unit.Animals,
DC.Is.Dry,
D.Ad.lib
) %>%
mutate(D.Amount = as.numeric(D.Amount))
# Function to harmonize diet ingredient units
harmonize_units <- function(df) {
df %>%
mutate(
# Record original units (optional, for debugging/traceability)
Units = paste(D.Unit.Amount, D.Unit.Time, D.Unit.Animals, sep=";"),
# Harmonize weight and volume units to a consistent base
D.Amount = case_when(
D.Unit.Amount %in% c("kg", "mg", "g/100g", "l", "kg/t", "g/300l",
"mg/kg", "kg/100kg", "kg/100kg body weight",
"kg/kg metabolic weight", "g/kg metabolic weight (0.75)",
"g/kg DMI", "g/L", "g/kg DM") ~ {
case_when(
D.Unit.Amount == "kg" ~ D.Amount * 1000, # kg -> g
D.Unit.Amount == "mg" ~ D.Amount / 1000, # mg -> g
D.Unit.Amount == "g/100g" ~ D.Amount * 10, # g/100g -> g/kg
D.Unit.Amount == "l" ~ D.Amount * 1000, # l -> ml
D.Unit.Amount == "kg/t" ~ D.Amount * 1000, # kg/t -> g/kg
D.Unit.Amount == "g/300l" ~ D.Amount / 300, # g/300l -> g/L
D.Unit.Amount == "mg/kg" ~ D.Amount / 1000, # mg/kg -> g/kg
D.Unit.Amount %in% c("kg/100kg", "kg/100kg body weight") ~ D.Amount * 10, # -> g/kg body weight
D.Unit.Amount == "kg/kg metabolic weight" ~ D.Amount * 1000, # -> g/kg metabolic weight
D.Unit.Amount %in% c("g/kg body weight (0.75)", "g/kg Body Weight (0.75)") ~ D.Amount, # unchanged, just normalize name
D.Unit.Amount == "g/kg DMI" ~ D.Amount,
D.Unit.Amount == "g/kg DM" ~ D.Amount,
TRUE ~ D.Amount
)
},
TRUE ~ D.Amount
),
# Update D.Unit.Amount to reflect new base
D.Unit.Amount = case_when(
D.Unit.Amount == "kg" ~ "g",
D.Unit.Amount == "mg" ~ "g",
D.Unit.Amount == "g/100g" ~ "g/kg",
D.Unit.Amount == "l" ~ "ml",
D.Unit.Amount == "kg/t" ~ "g/kg",
D.Unit.Amount == "g/300l" ~ "g/L",
D.Unit.Amount == "mg/kg" ~ "g/kg",
D.Unit.Amount %in% c("kg/100kg", "kg/100kg body weight") ~ "g/kg body weight",
D.Unit.Amount %in% c("g/kg body weight (0.75)", "g/kg Body Weight (0.75)") ~ "g/kg metabolic weight",
D.Unit.Amount == "kg/kg metabolic weight" ~ "g/kg metabolic weight",
D.Unit.Amount == "g/kg metabolic weight (0.75)" ~ "g/kg metabolic weight",
D.Unit.Amount == "g/kg DMI" ~ "g/kg",
D.Unit.Amount == "g/kg DM" ~ "g/kg",
D.Unit.Amount == "g/L" ~ "g/L",
D.Unit.Amount == "g/kg Body Weight" ~ "g/kg body weight", # case fix
TRUE ~ D.Unit.Amount
),
# Harmonize time units to day
D.Amount = case_when(
D.Unit.Time == "week" ~ D.Amount / 7,
D.Unit.Time == "month" ~ D.Amount / 30,
D.Unit.Time == "2 weeks" ~ D.Amount / 14,
D.Unit.Time == "4 days interval" ~ D.Amount / 4,
D.Unit.Time == "2x/day" ~ D.Amount * 2,
TRUE ~ D.Amount
),
D.Unit.Time = ifelse(!is.na(D.Unit.Time), "day", D.Unit.Time)
) %>%
# Set unit fields to NA if amount is NA
mutate(
D.Unit.Amount = ifelse(is.na(D.Amount), NA_character_, D.Unit.Amount),
D.Unit.Time = ifelse(is.na(D.Amount), NA_character_, D.Unit.Time),
D.Unit.Animals = ifelse(is.na(D.Amount), NA_character_, D.Unit.Animals)
)
}
# Apply the harmonization
ingredients <- harmonize_units(ingredients)
# Load replicate info to adjust for "all animals in replicate"
reps <- merged_metadata$MT.Out %>%
mutate(T.Animals = as.numeric(T.Animals)) %>%
distinct(B.Code, A.Level.Name, .keep_all = TRUE)
ingredients <- ingredients %>%
left_join(reps %>% dplyr::select(B.Code, A.Level.Name, T.Animals), by=c("B.Code","A.Level.Name")) %>%
mutate(
D.Amount = case_when(
D.Unit.Animals == "all animals in replicate" & !is.na(T.Animals) ~ D.Amount / T.Animals,
TRUE ~ D.Amount
),
D.Unit.Animals = ifelse(D.Unit.Animals == "all animals in replicate", "individual", D.Unit.Animals)
)
# Refresh 'Units' column after these adjustments (optional)
ingredients <- ingredients %>%
mutate(Units = paste(D.Unit.Amount, D.Unit.Time, D.Unit.Animals, sep=";"))
rm(harmonize_units)
This step identifies ingredients where DC.Is.Dry is marked as NA or unspecified. We cross-check these with moisture values from the composition table to infer whether the ingredient is likely dry or wet. We also flag inconsistencies between the ingredient name (e.g., containing “dried”) and the recorded dry status.
# --- 1. Classify ingredients from Moisture or DM values ---
dry_classified <- bind_rows(
# Moisture-based rules
merged_metadata$Animals.Diet.Comp %>%
filter(str_to_lower(DC.Variable) %in% c("moisture", "moist")) %>%
mutate(
Predictor = "Moisture",
Value = as.numeric(DC.Value),
Unit = str_to_lower(DC.Unit),
Dry_Prediction = case_when(
Unit %in% c("%", "% dm", "g/100g") & Value < 15 ~ TRUE,
Unit %in% c("%", "% dm", "g/100g") & Value > 50 ~ FALSE,
Unit == "g/kg" & Value < 150 ~ TRUE,
Unit == "g/kg" & Value > 500 ~ FALSE,
TRUE ~ NA
)
),
# DM-based rules
merged_metadata$Animals.Diet.Comp %>%
filter(str_to_lower(DC.Variable) == "dm") %>%
mutate(
Predictor = "DM",
Value = as.numeric(DC.Value),
Unit = str_to_lower(DC.Unit),
Dry_Prediction = case_when(
Unit %in% c("%", "% dm", "g/100g") & Value > 85 ~ TRUE,
Unit %in% c("%", "% dm", "g/100g") & Value < 50 ~ FALSE,
Unit == "g/kg" & Value > 850 ~ TRUE,
Unit == "g/kg" & Value < 500 ~ FALSE,
TRUE ~ NA
)
)
) %>%
select(B.Code, D.Item, Predictor, Dry_Prediction) %>%
distinct() %>%
group_by(B.Code, D.Item, Predictor) %>%
summarise(Dry_Prediction = first(na.omit(Dry_Prediction)), .groups = "drop") %>%
pivot_wider(names_from = Predictor, values_from = Dry_Prediction, names_prefix = "Dry_Prediction_")
# --- 2. Detect "dry" or "dried" in name ---
dry_name_match <- ingredients %>%
mutate(
name_lower = str_to_lower(D.Item),
Dry_by_name = str_detect(
name_lower,
"\\bdry\\b|\\bdried\\b|\\bmeal\\b|\\bground\\b|\\bmilled\\b|\\bflour\\b"
) & !str_detect(name_lower, "fish meal boiled")
) %>%
select(B.Code, A.Level.Name,D.Item, Dry_by_name)
extra_dry <- ingredients %>%
filter(
D.Type %in% c("Non-ERA Feed", "Other Feed","Other Ingredients","Supplement"),
!str_detect(str_to_lower(D.Item), "water|oil")
) %>%
select(B.Code,A.Level.Name, D.Item)
# Remove A.Level.Name from extra_dry before combining
extra_dry <- extra_dry %>%
select(B.Code, D.Item) %>%
distinct() %>%
mutate(DC.Is.Dry = "Yes")
# --- 3. Combine predictions ---
# Combine dry predictions from DM/moisture, name-based detection, and extra rules
dry_prediction_final <- bind_rows(
dry_classified %>%
mutate(Prediction_Source = "Moisture_or_DM") %>%
filter(Dry_Prediction_Moisture == TRUE | Dry_Prediction_DM == TRUE) %>%
select(B.Code, D.Item) %>%
mutate(DC.Is.Dry = "Yes"),
dry_name_match %>%
filter(Dry_by_name) %>%
select(B.Code, D.Item) %>%
mutate(DC.Is.Dry = "Yes", Prediction_Source = "Name_match"),
extra_dry %>%
select(B.Code, D.Item) %>%
mutate(DC.Is.Dry = "Yes", Prediction_Source = "Extra_rule")
) %>%
distinct(B.Code, D.Item, .keep_all = TRUE)
# --- 4. Apply predictions to ingredients table (update only predicted dry) ---
# 1. Join on D.Item
ingredients <- ingredients %>%
left_join(dry_prediction_final, by = c("B.Code", "D.Item"), suffix = c("", ".from_DItem"))
# 2. Join also on A.Level.Name
ingredients <- ingredients %>%
left_join(
dry_prediction_final %>%
rename(A.Level.Name = D.Item),
by = c("B.Code", "A.Level.Name"),
suffix = c("", ".from_AName")
)
# 3. Apply logic: keep existing "Yes", but if original is "No" or NA, check if any of the predictions is "Yes"
ingredients <- ingredients %>%
mutate(
DC.Is.Dry = case_when(
(is.na(DC.Is.Dry) | str_to_lower(DC.Is.Dry) %in% c("no","unspecified")) &
(DC.Is.Dry.from_DItem == "Yes" | DC.Is.Dry.from_AName == "Yes") ~ "Yes",
TRUE ~ DC.Is.Dry
)
) %>%
select(-DC.Is.Dry.from_DItem, -DC.Is.Dry.from_AName)
#take out water
ingredients<-ingredients%>%
filter(D.Item != "Water" | is.na(D.Item))
rm(dry_classified,dry_name_match,dry_prediction_final,extra_dry)
Subset for Dry data
When feed intake data is missing and we rely on ingredient-level amounts, we need to ensure that the quantities used refer to the dry form of the ingredient. To do this, we subset the data to include only ingredients identified as dried—based on either the declared DC.Is.Dry value or our earlier flagging using ingredient names and moisture content predictions. This ensures consistency and accuracy in downstream calculations.
#take out unspecified ingredients
species_by_bcode <- merged_metadata$Prod.Out %>%
select(B.Code, P.Product) %>%
distinct()
# 2. Papers per species before filtering
# papers_before <- ingredients %>%
# select(B.Code) %>%
# distinct() %>%
# left_join(species_by_bcode, by = "B.Code") %>%
# count(P.Product, name = "papers_before")
# length(unique(ingredients$B.Code))
# Step 1: Identify A.Level.Name groups with at least one ingredient NOT dry
non_dry_diets <- ingredients %>%
filter(is.na(DC.Is.Dry) | str_to_lower(DC.Is.Dry) %in% c("no", "n", "unspecified")) %>%
distinct(B.Code, A.Level.Name)
# Step 2: Filter out these diets entirely from ingredients
ingredients <- ingredients %>%
anti_join(non_dry_diets, by = c("B.Code", "A.Level.Name"))
length(unique(ingredients$A.Level.Name))
## [1] 1339
rm(non_dry_diets)
length(unique(ingredients$B.Code))
## [1] 492
species_by_bcode <- merged_metadata$Prod.Out %>%
select(B.Code, P.Product) %>%
distinct()
# 2. Papers per species before filtering
# papers_after <- ingredients %>%
# select(B.Code) %>%
# distinct() %>%
# left_join(species_by_bcode, by = "B.Code") %>%
# count(P.Product, name = "papers_after")
We filter the dataset to retain only ingredients with acceptable units. Uncommon or unexpected unit formats are excluded to ensure consistency and simplify downstream processing
# Define a list of acceptable units
acceptable_units <- c(
"g/kg", "g", "%", "ml", "L", "kg", "g/L",
"g/kg metabolic weight", "% Diet",
"g/kg body weight", "ml/kg", "% Body Mass",
"% Concentrate", "g/kg body weight" # leave only one if repeated
)
# Filter rows to keep only acceptable units
ingredients <- ingredients %>%
filter(D.Unit.Amount %in% acceptable_units | is.na(D.Unit.Amount)) %>%
mutate(D.Item = ifelse(is.na(D.Item), "Unspecified", D.Item))
#tracking paper loss because of harmnozied units
#total_studies_harmonized<- length(unique(ingredients_harmonized$B.Code))
#total_ingredients_harmonized<- length(unique(ingredients_harmonized$D.Item))
#total_diets_harmonized<- length(unique(ingredients_harmonized$A.Level.Name))
rm(acceptable_units)
# Pre-filter Data.Out to keep only the first non-NA weight per group
weights_unique <- merged_metadata$Data.Out %>%
filter(Out.Subind == "Weight Gain") %>%
mutate(
Out.WG.Unit = tolower(Out.WG.Unit),
Out.WG.Start = case_when(
Out.WG.Unit == "g" ~ Out.WG.Start / 1000,
Out.WG.Unit == "mg" ~ Out.WG.Start / 1e6,
Out.WG.Unit %in% c("kg", "unspecified", NA) ~ Out.WG.Start,
TRUE ~ NA_real_
),
Out.WG.Unit = ifelse(!is.na(Out.WG.Start), "kg", NA_character_)
) %>%
filter(!is.na(Out.WG.Start)) %>%
group_by(B.Code, A.Level.Name) %>%
summarise(
Out.WG.Start = mean(Out.WG.Start, na.rm = TRUE),
Out.WG.Unit = "kg",
.groups = "drop"
)
# Step 3: Join body weight and keep only ingredients with weight
ingredients <- ingredients %>%
left_join(
weights_unique,
by = c("B.Code", "A.Level.Name"))
# Step 4: Convert D.Amount to grams based on body weight and update unit accordingly
ingredients <- ingredients %>%
mutate(
# Recalculate D.Amount using body weight or metabolic weight, result in kg
D.Amount = case_when(
D.Unit.Amount == "g/kg body weight" & !is.na(Out.WG.Start) ~ (D.Amount * Out.WG.Start) / 1000,
D.Unit.Amount == "g/kg metabolic weight" & !is.na(Out.WG.Start) ~ (D.Amount * Out.WG.Start^0.75) / 1000,
D.Unit.Amount == "% Body Mass" & !is.na(Out.WG.Start) ~ ((D.Amount / 100) * Out.WG.Start),
TRUE ~ D.Amount # unchanged if not convertible
),
# Update unit to "kg" if recalculated
D.Unit.Amount = case_when(
D.Unit.Amount %in% c("g/kg body weight", "g/kg metabolic weight", "% Body Mass") &
!is.na(Out.WG.Start) ~ "kg",
TRUE ~ D.Unit.Amount
)
)
For ingredients reported in relative units such as %, % Diet, or g/kg, we can estimate their absolute amounts when total diet feed intake is available. This allows us to convert proportions into standardized quantities.
missing_unit_feed_intake<- merged_metadata$Data.Out %>%
select(B.Code, A.Level.Name, ED.Intake.Item, ED.Intake.Item.Raw,
ED.Mean.T, ED.Error, Out.Unit, Out.Subind,is_entire_diet,Source,Out.Code.Joined) %>%
filter(Out.Subind == "Feed Intake") %>%
filter(is.na(Out.Unit))
feed_intake <- merged_metadata$Data.Out %>%
select(B.Code, A.Level.Name, ED.Intake.Item, ED.Intake.Item.Raw,
ED.Mean.T, ED.Error, Out.Unit, Out.Subind,is_entire_diet,T.Animals) %>%
filter(Out.Subind == "Feed Intake") %>%
filter(!is.na(ED.Mean.T))%>%
mutate(
Out.Unit = tolower(trimws(Out.Unit)),
# Extract unit components
Out.Unit.Animals = str_extract(Out.Unit, "\\b(individual|replicate|animal replicate)\\b"),
Out.Unit.Time = str_extract(Out.Unit, "\\b(day|d|yr|year)\\b"),
Out.Unit.Amount = Out.Unit %>%
str_remove_all("\\b(individual|replicate|animal replicate)\\b") %>%
str_remove_all("\\b(day|d|yr|year)\\b") %>%
str_replace_all("/+", "/") %>%
str_trim() %>%
str_remove("/$"))
# Step 3: Convert replicate-level intake to individual-level when possible
feed_intake <- feed_intake %>%
mutate(
# Ensure numeric type
ED.Mean.T = as.numeric(ED.Mean.T),
T.Animals = as.numeric(T.Animals),
# Convert intake and update unit
ED.Mean.T = case_when(
Out.Unit.Animals == "replicate" & !is.na(T.Animals) & T.Animals > 0 ~ ED.Mean.T / T.Animals,
TRUE ~ ED.Mean.T
),
Out.Unit.Animals = case_when(
Out.Unit.Animals == "replicate" & !is.na(T.Animals) & T.Animals > 0 ~ "individual",
TRUE ~ Out.Unit.Animals
)
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `T.Animals = as.numeric(T.Animals)`.
## Caused by warning:
## ! NAs introduced by coercion
# Define preferred units
preferred_units <- c("kg dm", "g dm")
preferred_time <- "day"
preferred_animals <- "individual"
# Create filtered table with prioritization
feed_intake <- feed_intake %>%
mutate(
priority = case_when(
Out.Unit.Amount %in% preferred_units &
Out.Unit.Time == preferred_time &
Out.Unit.Animals == preferred_animals &
!is.na(ED.Error) ~ 1,
Out.Unit.Amount %in% preferred_units &
Out.Unit.Time == preferred_time &
Out.Unit.Animals == preferred_animals ~ 2,
TRUE ~ 3
)
) %>%
group_by(B.Code, A.Level.Name, ED.Intake.Item) %>%
slice_min(order_by = priority, with_ties = FALSE) %>%
ungroup() %>%
select(-priority) # optional
### keep unique units of ingredients
# Step 3: Join body weight
feed_intake <- feed_intake %>%
left_join(
weights_unique %>%
select(B.Code, A.Level.Name, Out.WG.Start, Out.WG.Unit),
by = c("B.Code", "A.Level.Name")
) %>%
group_by(A.Level.Name) %>%
mutate(
Out.WG.Start = ifelse(is.na(Out.WG.Start),
first(na.omit(Out.WG.Start)),
Out.WG.Start)
) %>%
ungroup() %>%
distinct()
# Step 4: Convert ED.Mean.T to kg/day
feed_intake <- feed_intake %>%
mutate(
Out.Unit = trimws(tolower(Out.Unit)),
Out.Unit.Amount = trimws(str_remove(Out.Unit.Amount, "/$")),
ED.Mean.T = case_when(
Out.Unit.Amount %in% c("g", "g dm") ~ ED.Mean.T / 1000,
Out.Unit.Amount %in% c("kg", "kg dm") ~ ED.Mean.T,
Out.Unit.Amount == "g dm/100 g" ~ ED.Mean.T * 10 / 1000,
Out.Unit.Amount == "g/week" ~ ED.Mean.T / 7 / 1000,
Out.Unit.Amount == "g dm/kg metabolic weight" ~ (ED.Mean.T * Out.WG.Start^0.75) / 1000,
Out.Unit.Amount == "g dm/kg body weight" ~ (ED.Mean.T * Out.WG.Start) / 1000,
Out.Unit.Amount == "mg" ~ ED.Mean.T / 1e6,
Out.Unit.Amount %in% c("g/kg body weight", "g dm/kg body weight") ~ (ED.Mean.T * Out.WG.Start) / 1000,
Out.Unit.Amount %in% c("g/kg body weight (0.75)", "g dm/kg body weight (0.75)") ~ (ED.Mean.T * Out.WG.Start^0.75) / 1000,
Out.Unit.Amount %in% c("g/kg metabolic weight","g dm/kg metabolic weight") ~ (ED.Mean.T * Out.WG.Start^0.75) / 1000,
Out.Unit.Amount == "kg dm/kg metabolic weight" ~ ED.Mean.T * Out.WG.Start^0.75,
Out.Unit.Amount %in% c("% body weight", "dm % body weight") ~ (ED.Mean.T / 100) * Out.WG.Start,
Out.Unit.Amount == "g/g body weight" ~ (ED.Mean.T * Out.WG.Start) / 1000,
TRUE ~ NA_real_
),
Out.Unit.Amount = case_when(
Out.Unit.Amount %in% c("g", "g dm", "g dm/100 g", "g/week", "g/kg body weight",
"g dm/kg body weight", "g/kg body weight (0.75)",
"g dm/kg body weight (0.75)", "g/kg metabolic weight","g dm/kg metabolic weight","g/g body weight",
"kg dm/kg metabolic weight", "% body weight",
"dm % body weight", "kg", "kg dm", "mg") ~ "kg",
TRUE ~ Out.Unit.Amount
)
) %>%
select(-Out.Unit)
feed_intake <- feed_intake %>%
group_by(B.Code, A.Level.Name, ED.Intake.Item.Raw) %>%
arrange(
desc(!is.na(ED.Mean.T)),
desc(Out.Unit.Amount == "kg"),
desc(Out.Unit.Animals == "individual"),
desc(Out.Unit.Time == "day")
) %>%
slice(1) %>%
ungroup()
Performs basic outlier checks on feed intake data, identifying suspiciously high or low intake values based on predefined unit thresholds.
# Assuming your data is stored in a data frame named `feed_intake_data`
# Filter for units that imply kg/day/individual and check if ED.Mean.T > 50
# Identify suspicious feed intake values
## ---- detect_suspicious_feed_intake -----------------------------------------
# Thresholds
high_kg <- 50 # > 50 kg d-1 animal-1 ⟹ unrealistically high
low_kg <- 0.1 # < 0.1 kg d-1 animal-1 ⟹ unrealistically low
low_g_min <- 1 # 1–20 g range unlikely for daily intake
low_g_max <- 20
suspicious_feed_intake <- feed_intake %>%
mutate(
flag = case_when(
Out.Unit.Amount %in% c("kg", "kg dm") &
Out.Unit.Time == "day" &
Out.Unit.Animals == "individual" & ED.Mean.T > high_kg ~ "too_high_kg",
Out.Unit.Amount %in% c("kg", "kg dm") &
Out.Unit.Time == "day" &
Out.Unit.Animals == "individual" & ED.Mean.T < low_kg ~ "too_low_kg",
Out.Unit.Amount %in% c("g", "g dm") &
ED.Mean.T >= low_g_min & ED.Mean.T <= low_g_max ~ "implausibly_low_g",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(flag)) %>%
select(B.Code, A.Level.Name, ED.Mean.T,
Out.Unit.Amount, Out.Unit.Time, Out.Unit.Animals, flag)
print(suspicious_feed_intake)
## # A tibble: 579 × 7
## B.Code A.Level.Name ED.Mean.T Out.Unit.Amount Out.Unit.Time Out.Unit.Animals
## <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 AG0051 NG 0.0083 kg day individual
## 2 AG0051 NGD 0.0082 kg day individual
## 3 AG0051 PG 0.0091 kg day individual
## 4 AG0051 PGD 0.0092 kg day individual
## 5 AN0007 T1 0.0696 kg day individual
## 6 AN0007 T2 0.0735 kg day individual
## 7 AN0007 T3 0.0808 kg day individual
## 8 AN0007 T4 0.0769 kg day individual
## 9 AN0007 T5 0.0802 kg day individual
## 10 AN0020 Control T1 0.0503 kg day individual
## # ℹ 569 more rows
## # ℹ 1 more variable: flag <chr>
low_intake_codes <- feed_intake %>%
filter(
# (a) < 0.1 kg / day / individual
(Out.Unit.Amount %in% c("kg", "kg dm") &
Out.Unit.Time == "day" &
Out.Unit.Animals == "individual" &
ED.Mean.T < 0.1) |
# (b) 1–20 g reported in gram units
(Out.Unit.Amount %in% c("g", "g dm") &
ED.Mean.T >= 1 & ED.Mean.T <= 20)
) %>%
distinct(B.Code) %>% # keep each code only once
pull() # return as a plain character vector
low_intake_codes
## [1] "AG0051" "AN0007" "AN0020" "B01063" "BO1012"
## [6] "BO1015" "BO1030" "BO1042" "BO1048" "BO1061"
## [11] "BO1062" "BO1064" "BO1075" "BO1095" "CJ1001"
## [16] "CJ1006" "CJ1015" "DK0037" "EM1049" "EM1060"
## [21] "EM1062" "EM1065" "EM1066" "EM1068" "EM1069"
## [26] "EM1070" "EM1077" "EM1081" "EM1092" "EM1097"
## [31] "EM1100" "EO0032" "EO0047" "EO0086" "EO0102"
## [36] "HK0005" "HK0007" "HK0011" "HK0014" "HK0017"
## [41] "HK0037" "HK0045" "HK0066.2" "HK0067" "HK0102.2"
## [46] "HK0118" "HK0228" "HK0260" "HK0319" "HK0331"
## [51] "HK0333" "HK0337" "JO1001" "JO1010" "JO1015"
## [56] "JO1020" "JO1022" "JO1023" "JO1025" "JO1026"
## [61] "JO1027" "JO1033" "JO1034" "JO1036" "JO1038"
## [66] "JO1040" "JO1041" "JO1042" "JO1050" "JO1057"
## [71] "JO1068" "JO1069" "JO1070" "JO1071" "JO1074"
## [76] "JO1075" "JO1077" "JO1078" "JS0047" "JS0048"
## [81] "JS0076" "JS0079.1" "JS0093" "JS0095" "JS0123"
## [86] "JS0124" "JS0125" "JS0134" "JS0135" "JS0139"
## [91] "JS0177" "JS0192" "JS0193" "JS0206" "JS0207"
## [96] "JS0208" "JS0209" "JS0281" "JS0291" "JS0295"
## [101] "JS0296" "JS0309" "JS0342" "JS0370" "JS0371"
## [106] "JS0393" "LM0120" "LM0266" "LM0277.1" "LM0289"
## [111] "LM0310" "LM0312" "LM0334" "LM0335" "LM0354"
## [116] "NJ0003" "NJ0047" "NN0021" "NN0083" "NN0085"
## [121] "NN0098" "NN0108" "NN0114" "NN0115" "NN0118"
## [126] "NN0119.2" "NN0189" "NN0198" "NN0201" "NN0205"
## [131] "NN0211" "NN0212" "NN0214" "NN0230" "NN0251"
## [136] "NN0272.1" "NN0272.1.2" "NN0272.2" "NN0321" "NN0325"
## [141] "NN0326" "NN0358" "NN0359" "NN0367" "NN0377"
## [146] "NN0396"
Some papers were identified with errors. This section corrects those error if they are still present before the next update of the data.
# Step: Convert suspiciously high kg values that are likely mislabelled g
feed_intake <- feed_intake %>%
mutate(
Out.Unit.Amount = case_when(
Out.Unit.Amount == "kg" & Out.Unit.Animals == "individual" & Out.Unit.Time == "day" & ED.Mean.T > 50 ~ "g", # correct mislabeling
TRUE ~ Out.Unit.Amount
)
)
If feed intake data is there for the entire diet and that the ingredients in ingredients dataset are in % or % of diet or g/kg we can convert
# Step 5: Estimate ingredient absolute amounts for diets with "Entire Diet" intake
ingredients <- ingredients %>%
# Join only feed intake values where ED.Intake.Item is "Entire Diet"
left_join(
feed_intake %>%
filter(ED.Intake.Item == "Entire Diet") %>%
select(B.Code, A.Level.Name, Intake_kg_day = ED.Mean.T),
by = c("B.Code", "A.Level.Name")
) %>%
mutate(
# Estimate absolute amount (in grams)
D.Amount = case_when(
D.Unit.Amount == "%" & !is.na(Intake_kg_day) ~ (D.Amount / 100) * Intake_kg_day * 1000,
D.Unit.Amount == "% Diet" & !is.na(Intake_kg_day) ~ (D.Amount / 100) * Intake_kg_day * 1000,
D.Unit.Amount == "g/kg" & !is.na(Intake_kg_day) ~ D.Amount * Intake_kg_day,
TRUE ~ D.Amount
),
# Update unit only if we did a conversion
D.Unit.Amount = case_when(
D.Unit.Amount %in% c("%", "% Diet", "g/kg") & !is.na(Intake_kg_day) ~ "g",
TRUE ~ D.Unit.Amount
)
) %>%
select(-Intake_kg_day)
We excluded the basal diets because, in most of the livestock trials in our database, the basal ration is offered ad libitum and therefore lacks two pieces of information that are essential for a fair, productivity-based comparison: (i) actual feed-intake measurements and (ii) corresponding enteric-methane estimates. Without intake data we cannot convert emissions—or, in many cases, even estimate them—on a per-kilogram-of-product basis, and retaining these records would artificially inflate the number of “treatments” while contributing no usable information to the efficiency analyses. By focusing on the control diet (with measured intake) and the formulated treatment diets, we keep a coherent set of observations in which every data point can be expressed as both absolute emissions and emission intensity, allowing robust, like-for-like comparisons across studies.
ingredients<-ingredients%>%
filter(A.Level.Name!="Base")
We prioritize feed intake data over the amounts reported in the methods section, as feed intake is the key variable needed for emission calculations. When feed intake data is unavailable, we fall back on the harmonized ingredient amounts extracted earlier.
# Step 5: Remove feed intake entries that are actually full-diet labels
feed_intake_ingredients <- feed_intake %>%
filter(str_trim(ED.Intake.Item) != str_trim(A.Level.Name) &
str_trim(ED.Intake.Item) != "Entire Diet") %>%
filter(!is.na(ED.Mean.T))
# Step 6: Merge with ingredients and override with ED.Mean.T when available
# FIXED VERSION
# Merge with ingredients and override with ED.Mean.T when available
ingredients <- ingredients %>%
left_join(
feed_intake_ingredients %>%
select(B.Code, A.Level.Name, ED.Intake.Item, ED.Mean.T, Out.Unit.Amount, Out.Unit.Animals, Out.Unit.Time),
by = c("B.Code", "A.Level.Name", "D.Item" = "ED.Intake.Item")
) %>%
mutate(
from_feed_intake = !is.na(ED.Mean.T),
D.Amount = if_else(from_feed_intake, ED.Mean.T, D.Amount),
D.Unit.Amount = if_else(from_feed_intake & !is.na(Out.Unit.Amount), Out.Unit.Amount, D.Unit.Amount),
D.Unit.Animals = if_else(from_feed_intake & !is.na(Out.Unit.Animals), Out.Unit.Animals, D.Unit.Animals),
D.Unit.Time = if_else(from_feed_intake & !is.na(Out.Unit.Time), Out.Unit.Time, D.Unit.Time)
) %>%
select(-ED.Mean.T, -Out.Unit.Amount, -Out.Unit.Animals, -Out.Unit.Time) %>%
distinct()
# Step 8: Final conversion of unit to grams (g)
ingredients <- ingredients %>%
mutate(
D.Amount = case_when(
D.Unit.Amount == "kg" ~ D.Amount * 1000,
TRUE ~ D.Amount
),
D.Unit.Amount = case_when(
D.Unit.Amount == "kg" ~ "g",
TRUE ~ D.Unit.Amount
)
)
TO do : See BO1070 we want the full diet to be included from feed intake which is not the case now see feed intake data the goal would be to add rows in ingredient to add info at the entire diet level have a close look to make it the same format as entire diets already in ingredients
# Step: Extract feed intake entries that refer to the entire diet
feed_intake_diet <- feed_intake %>%
filter(ED.Intake.Item == A.Level.Name | ED.Intake.Item == "Entire Diet")
# Step: Merge and overwrite ingredients using diet-level feed intake
ingredients <- ingredients %>%
left_join(
feed_intake_diet %>%
select(B.Code, A.Level.Name, ED.Intake.Item, ED.Intake.Item.Raw, ED.Mean.T, Out.Unit.Amount, Out.Unit.Animals, Out.Unit.Time),
by = c("B.Code", "A.Level.Name", "D.Type" = "ED.Intake.Item.Raw")
) %>%
mutate(
from_feed_intake2 = !is.na(ED.Mean.T),
D.Amount = if_else(from_feed_intake2, ED.Mean.T, D.Amount),
D.Unit.Amount = if_else(from_feed_intake2 & !is.na(Out.Unit.Amount), Out.Unit.Amount, D.Unit.Amount),
D.Unit.Animals = if_else(from_feed_intake2 & !is.na(Out.Unit.Animals), Out.Unit.Animals, D.Unit.Animals),
D.Unit.Time = if_else(from_feed_intake2 & !is.na(Out.Unit.Time), Out.Unit.Time, D.Unit.Time)
) %>%
select(-ED.Mean.T, -Out.Unit.Amount, -Out.Unit.Animals, -Out.Unit.Time,-ED.Intake.Item) %>%
distinct()
# Step 8: Final conversion of unit to grams (g)
ingredients <- ingredients %>%
mutate(
D.Amount = case_when(
D.Unit.Amount == "kg" ~ D.Amount * 1000,
TRUE ~ D.Amount
),
D.Unit.Amount = case_when(
D.Unit.Amount == "kg" ~ "g",
TRUE ~ D.Unit.Amount
)
)
Not all ingredients and diets are currently included in the digestibility and composition tables—only those for which data is available. We aim to add the missing ones to help complete the dataset and enable more comprehensive analysis.
# --- Step 1: Get all unique (B.Code, D.Item, Source) from the Diet table ---
all_diet_items <- merged_metadata$Animals.Diet %>%
distinct(B.Code, A.Level.Name, D.Item, Source) %>%
mutate(
is_entire_diet = if_else(is.na(D.Item), TRUE, FALSE),
D.Item = coalesce(D.Item, A.Level.Name)
) %>%
select(B.Code, D.Item, is_entire_diet, Source) %>%
distinct()
# --- Step 2: Get unique variables in the nutrition and digestibility datasets ---
nutrition_variables <- unique(na.omit(merged_metadata$Animals.Diet.Comp$DC.Variable))
digestibility_variables <- unique(na.omit(merged_metadata$Animals.Diet.Digest$DD.Variable))
# --- Step 3: Expand to full grid of all D.Item × variables (by B.Code and Source) ---
# Nutrition (Comp)
comp_missing <- all_diet_items %>%
crossing(DC.Variable = nutrition_variables) %>%
anti_join(
merged_metadata$Animals.Diet.Comp %>% select(B.Code, D.Item, DC.Variable),
by = c("B.Code", "D.Item", "DC.Variable")
) %>%
mutate(
DC.Value = NA_real_,
DC.Unit = NA_character_
)
# Digestibility (Digest)
digest_missing <- all_diet_items %>%
crossing(DD.Variable = digestibility_variables) %>%
anti_join(
merged_metadata$Animals.Diet.Digest %>% select(B.Code, D.Item, DD.Variable),
by = c("B.Code", "D.Item", "DD.Variable")
) %>%
mutate(
DD.Value = NA_real_,
DD.Unit = NA_character_
)
# --- Step 4: Bind the missing values into the existing datasets ---
merged_metadata$Animals.Diet.Comp <- bind_rows(merged_metadata$Animals.Diet.Comp, comp_missing)
merged_metadata$Animals.Diet.Digest <- bind_rows(merged_metadata$Animals.Diet.Digest, digest_missing)
# --- Step 5: Ensure each ingredient has a row for each nutrition variable ---
comp_completion <- merged_metadata$Animals.Diet.Comp %>%
distinct(B.Code, D.Item, is_entire_diet, Source) %>%
crossing(DC.Variable = nutrition_variables) %>%
anti_join(
merged_metadata$Animals.Diet.Comp %>% select(B.Code, D.Item, DC.Variable),
by = c("B.Code", "D.Item", "DC.Variable")
) %>%
mutate(
DC.Value = NA_real_,
DC.Unit = NA_character_
)
# --- Step 6: Ensure each ingredient has a row for each digestibility variable ---
digest_completion <- merged_metadata$Animals.Diet.Digest %>%
distinct(B.Code, D.Item, is_entire_diet, Source) %>%
crossing(DD.Variable = digestibility_variables) %>%
anti_join(
merged_metadata$Animals.Diet.Digest %>% select(B.Code, D.Item, DD.Variable),
by = c("B.Code", "D.Item", "DD.Variable")
) %>%
mutate(
DD.Value = NA_real_,
DD.Unit = NA_character_
)
# --- Step 7: Bind additional completion rows ---
merged_metadata$Animals.Diet.Comp <- bind_rows(merged_metadata$Animals.Diet.Comp, comp_completion)
merged_metadata$Animals.Diet.Digest <- bind_rows(merged_metadata$Animals.Diet.Digest, digest_completion)
rm(comp_completion,comp_missing,digest_completion,digest_missing)
# Step 1: Get species for each B.Code
species_by_bcode <- merged_metadata$Prod.Out %>%
distinct(B.Code, P.Product)
# Step 2: Filter Comp table for GE values
# ge_papers <- merged_metadata$Animals.Diet.Comp %>%
# filter(DC.Variable == "GE", !is.na(DC.Value)) %>%
# distinct(B.Code) %>%
# left_join(species_by_bcode, by = "B.Code") %>%
# count(P.Product, name = "papers_with_GE")
#
# # View result
# ge_papers
# ── 2. Build a “variable → units” summary table ───────────────────────────────
table_units <- merged_metadata$Animals.Diet.Comp %>%
# keep rows where a unit is actually recorded
filter(!is.na(DC.Unit) & DC.Unit != "NA" & str_trim(DC.Unit) != "") %>%
# one row per unique (variable, unit) pair
distinct(DC.Variable, DC.Unit) %>%
# collapse multiple units for the same variable into a single comma-separated
group_by(DC.Variable) %>%
summarise(units = paste(sort(unique(DC.Unit)), collapse = ", "),
.groups = "drop") %>%
arrange(DC.Variable)
# ── 3. Inspect / export ───────────────────────────────────────────────────────
print(table_units)
## # A tibble: 62 × 2
## DC.Variable units
## <chr> <chr>
## 1 ADF %, % DM, g, g/100g, g/100g DM, g/kg, g/kg DM
## 2 ADL %, % DM, g/100g, g/100g DM, g/kg, g/kg DM
## 3 Ash %, % DM, g/100g, g/100g DM, g/kg, g/kg DM, Unspecified
## 4 CF %, % DM, g/kg, g/kg DM
## 5 CFbr %, % DM, g/100g DM, g/kg, g/kg DM
## 6 CFibre %, g/100g, g/100g DM, g/kg, g/kg(Prosopis)/ % (diet), Unspecified
## 7 CH %, % DM, g/kg DM
## 8 CP %, % DM, %/(g/100g for pumpkin leaves), g, g/100g, g/100g DM, g/…
## 9 CT %, % DM, g/kg, g/kg DM, mg/g
## 10 Ca %, % DM, g/100g, g/kg, g/kg DM
## # ℹ 52 more rows
# write_csv(table_units, "variable_units_lookup.csv") # if you want a file
###############################################################################
## Fix the mis-labelled GE unit for four specific ingredients
## *and* only in the paper NN0106
###############################################################################
bad_items <- c(
"Biscuit Waste Meal Dried(Sun)",
"Leucaena leucocephala Leaves Dried",
"Maize",
"Wheat Offal"
)
merged_metadata$Animals.Diet.Comp <- merged_metadata$Animals.Diet.Comp %>%
mutate(
DC.Unit = if_else(
D.Item %in% bad_items & # right ingredient name
B.Code == "NN0106" & # right paper / source
DC.Variable == "GE" & # only Gross Energy rows
DC.Unit == "kcal/g", # only if label is wrong
"kcal/kg", # corrected label
DC.Unit
)
)
animals <- merged_metadata$Animals.Diet.Comp %>% mutate(value_num = suppressWarnings(as.numeric(DC.Value)))
# 1. nutrient (% / g-based) ➜ g / kg DM ========================================
vars_gkg <- c("NDF", "EE", "CP", "Ash")
conv_gkg <- tribble(
~DC.Unit, ~factor,
"%", 10,
"% DM", 10,
"g/100g", 10,
"g/100g DM", 10,
"g/kg", 1,
"g/kg DM", 1
)
animals <- animals %>%
left_join(conv_gkg, by = "DC.Unit") %>%
mutate(
value_std_gkg = if_else(DC.Variable %in% vars_gkg & !is.na(factor),
value_num * factor, NA_real_),
unit_std_gkg = if_else(!is.na(value_std_gkg), "g/kg", NA_character_)
) %>%
select(-factor)
# 2. energy ➜ MJ / kg (DM) =====================================================
vars_MJ <- c("GE", "ME", "NE")
conv_MJ <- tribble(
~DC.Unit, ~factorMJ,
"MJ/kg", 1,
"MJ/kg DM", 1,
"MJ/100g", 10,
"MJ/100g DM", 10,
"MJ/g", 1e3,
"kJ/kg", 0.001,
"kJ/kg DM", 0.001,
"kJ/100g", 0.01,
"kJ/100g DM", 0.01,
"kJ/g", 1,
"J/mg", 1, # J per mg ×1e-6 MJ/J ×1e6 mg/kg
"kcal/kg", 0.004184,
"kcal/kg DM", 0.004184,
"kcal/100g", 0.04184,
"kcal/g", 4.184,
"Mcal/kg", 4.184,
"Mcal/kg DM", 4.184
)
animals <- animals %>%
left_join(conv_MJ, by = "DC.Unit") %>%
mutate(
value_std_MJ = if_else(DC.Variable %in% vars_MJ & !is.na(factorMJ),
value_num * factorMJ, NA_real_),
unit_std_MJ = if_else(!is.na(value_std_MJ), "MJ/kg", NA_character_)
) %>%
select(-factorMJ)
# 3. overwrite DC.Value / DC.Unit where a conversion succeeded -----------------
animals <- animals %>%
mutate(
DC.Value = coalesce(value_std_gkg, value_std_MJ, value_num) |> format(scientific = FALSE),
DC.Unit = coalesce(unit_std_gkg, unit_std_MJ, DC.Unit)
) %>%
select(-value_num, -value_std_gkg, -value_std_MJ,
-unit_std_gkg, -unit_std_MJ)
# 4. save back into the metadata list ------------------------------------------
merged_metadata$Animals.Diet.Comp <- animals
# ── END CHUNK ──────────────────────────────────────────────────────────────────
coverage_raw <- merged_metadata$Animals.Diet.Comp %>%
filter(!is.na(DC.Variable)) %>%
mutate(
# Clean and detect actual missing values
DC.Value_clean = str_trim(as.character(DC.Value)),
DC.Value_clean = na_if(DC.Value_clean, ""),
DC.Value_clean = na_if(DC.Value_clean, "NA"),
DC.Value_clean = na_if(DC.Value_clean, "na")
) %>%
group_by(DC.Variable) %>%
summarise(
n_total = n(),
n_non_missing = sum(!is.na(DC.Value_clean)),
coverage_percent = round(100 * n_non_missing / n_total, 1),
.groups = "drop"
) %>%
arrange(desc(coverage_percent))
Feedipedia is an online database that provides comprehensive information on feed ingredients used in livestock nutrition. It aggregates data on the nutritional composition, digestibility, and other relevant properties of various feeds.We will use it to infer data when info is missing from our data extraction. Links each ingredient to its corresponding entry in Feedipedia, enabling supplementation of missing nutritional and digestibility data such as GE, CP, EE, and Ash.
## download data from feedapedia (generated with the "feedapedia scrapping script available on github)
# Use the raw URL of the file on GitHub
url <- "https://github.com/ERAgriculture/ERL/raw/main/downloaded_data/feedipedia.parquet"
# Create a temporary file location with a .parquet extension
temp_file <- tempfile(fileext = ".parquet")
# Download the file in binary mode
download.file(url, destfile = temp_file, mode = "wb")
# Read the Parquet file
feedipedia <- read_parquet(temp_file)
## download AOM with the correspondance between feedipedia codes and diet items
# Define the raw URL for the Excel file
url <- "https://raw.githubusercontent.com/Mlolita26/ERA_Docs/main/era_master_sheet(7).xlsx"
# Download the file to a temporary location
temp_file <- tempfile(fileext = ".xlsx")
download.file(url, destfile = temp_file, mode = "wb")
# Read only the sheet called "AOM"
mastersheet <- read_excel(temp_file, sheet = "AOM") %>%
filter(L5=="Feed Ingredient")
mastersheet$Feedipedia <- as.numeric(sub(".*node/", "", mastersheet$Feedipedia))
feedipedia_unit_audit <- feedipedia %>%
mutate(
Variable = as.character(Variable),
Unit = trimws(Unit)
) %>%
group_by(Variable, Unit) %>%
summarise(n_rows = n(), .groups = "drop") %>%
arrange(Variable, desc(n_rows)) %>%
group_by(Variable) %>%
mutate(
n_units = n(),
flag = if_else(n_units > 1, "⚠︎ multiple units", "")
) %>%
ungroup()
# ── wide human-friendly table ---------------------------------------------
unit_summary <- feedipedia_unit_audit %>%
unite(unit_n, Unit, n_rows, sep = " (n = ") %>%
mutate(unit_n = paste0(unit_n, ")")) %>%
group_by(Variable, flag) %>%
summarise(units = paste(unit_n, collapse = "; "), .groups = "drop") %>%
arrange(Variable)
###7.1) Harmonize feedipedia units
# helper -------------------------------------------------------------
to_canonical <- function(val, unit, vclass){
u <- str_trim(tolower(unit))
nu <- unit # keep the original unit by default
v <- val # keep the original value by default
if (vclass == "energy"){ # ---------- ENERGY ----------
if (u %in% c("mcal/kg dm","mcal/kg","mcal/kg d.m.")){
v <- val * 4.184 ; nu <- "MJ/kg"
} else if (u %in% c("kcal/kg dm","kcal/kg")){
v <- val * 0.004184 ; nu <- "MJ/kg"
} else if (u %in% c("mj/kg","mj/kg dm")){
nu <- "MJ/kg"
}
} else { # -------- NUTRIENT ----------
if (u %in% c("% dm","%","% as fed")){
v <- val * 10 ; nu <- "g/kg"
} else if (u %in% c("mg/kg dm","mg/kg")){
v <- val / 1000 ; nu <- "g/kg"
} else if (u %in% c("g/100g dm","g/100g")){
v <- val * 10 ; nu <- "g/kg"
} else if (u == "g dm"){
nu <- "g/kg"
}
}
list(val = v, unit = nu)
}
# ── 0 · variable lists ──────────────────────────────────────────────────────
nutrition_vars <- str_trim(c(
"ADF", "Crude protein", "Ether extract", "Ash","NDF"
))
energy_vars <- c( # ✱ NEW: long names added here ✱
"GE", "Gross energy",
"ME", "Metabolizable energy",
"NE",
"AME", "AMEn", "TME", "TMEn",
"ME ruminants", "ME ruminants (FAO, 1982)", "ME ruminants (gas production)",
"MEn growing pig", "MEn rabbit",
"NE growing pig"
)
# ── 1 · conversion ─────────────────────────────────────────────────────────
feedipedia <- feedipedia %>%
rowwise() %>%
mutate(
conv = list(
if (Variable %in% c(nutrition_vars, energy_vars)) # ← look in BOTH lists
to_canonical(
Avg, Unit,
if_else(Variable %in% energy_vars, "energy", "nutrient")
)
else list(val = Avg, unit = Unit) # leave untouched
)
) %>%
tidyr::unnest_wider(conv) %>%
mutate(Avg = val, Unit = unit) %>%
select(-val, -unit) %>%
ungroup()
# --- A) Add Feedipedia node from mastersheet into Comp and Digest tables
add_feedipedia_node <- function(df, mastersheet) {
df %>%
left_join(
mastersheet %>% select(Edge_Value, Feedipedia),
by = c("D.Item" = "Edge_Value")
) %>%
distinct()
}
merged_metadata$Animals.Diet.Comp <- add_feedipedia_node(merged_metadata$Animals.Diet.Comp, mastersheet)
merged_metadata$Animals.Diet.Digest <- add_feedipedia_node(merged_metadata$Animals.Diet.Digest, mastersheet)
# --- B) Standardize Feedipedia variables and assign categories
digestibility_keywords <- c(
"digestibility", "degradability", "energy digestibility",
"DE", "ME", "NE", "AMEn", "OM digestibility", "DM digestibility",
"ruminants", "rabbit", "pig", "poultry", "horse", "salmonids", "hydrolysis"
)
feedipedia <- feedipedia %>%
mutate(
Variable = case_when(
tolower(Variable) == "gross energy" ~ "GE",
tolower(Variable) == "crude protein" ~ "CP",
tolower(Variable) == "ether extract" ~ "EE",
tolower(Variable) == "ash" ~ "Ash",
tolower(Variable) == "neutral detergent fibre
" ~ "NDF",
TRUE ~ Variable
),
category = case_when(
Variable %in% c("GE", "CP", "EE", "Ash","NDF") ~ "nutrition",
str_detect(Variable, regex(paste(digestibility_keywords, collapse = "|"), ignore_case = TRUE)) ~ "digestibility",
TRUE ~ "nutrition"
),
feedipedia_node = as.numeric(feedipedia_node)
)
# --- C) Ensure source columns exist
if (!"nutrition_source" %in% names(merged_metadata$Animals.Diet.Comp)) {
merged_metadata$Animals.Diet.Comp$nutrition_source <- NA_character_
}
if (!"digestibility_source" %in% names(merged_metadata$Animals.Diet.Digest)) {
merged_metadata$Animals.Diet.Digest$digestibility_source <- NA_character_
}
# --- D) Nutrition variables: GE, CP, EE, Ash
nutrition_vars <- c("GE", "CP", "EE", "Ash","NDF")
feedipedia_nutrition <- feedipedia %>%
filter(Variable %in% nutrition_vars) %>%
group_by(feedipedia_node, Variable, Unit) %>%
summarise(Value = mean(Avg, na.rm = TRUE), .groups = "drop") %>%
rename(DC.Variable = Variable, DC.Value.feedipedia = Value)
# --- E) Join nutrition into Animals.Diet.Comp
merged_metadata$Animals.Diet.Comp <- merged_metadata$Animals.Diet.Comp %>%
mutate(DC.Value = suppressWarnings(as.numeric(DC.Value))) %>%
left_join(
feedipedia_nutrition,
by = c("Feedipedia" = "feedipedia_node", "DC.Variable")
) %>%
mutate(
# 1. If DC.Value is missing *and* Feedipedia has a value,
# copy the Feedipedia unit; otherwise keep the existing one
DC.Unit = if_else(is.na(DC.Value) & !is.na(DC.Value.feedipedia),
Unit, # unit from Feedipedia
DC.Unit), # keep existing
# 2. Now copy the value itself
DC.Value = coalesce(DC.Value, DC.Value.feedipedia),
# 3. Flag the source when we used Feedipedia
nutrition_source = if_else(
is.na(nutrition_source) & !is.na(DC.Value.feedipedia),
"feedipedia", nutrition_source)
) %>%
select(-DC.Value.feedipedia, -Unit)
# --- F) Digestibility: Energy digestibility, ruminants → DE
feedipedia_digestibility <- feedipedia %>%
filter(str_detect(tolower(Variable), "energy digestibility")) %>%
mutate(
DD.Variable = "DE", # Map all energy digestibility to "DE"
feedipedia_node = as.numeric(feedipedia_node)
) %>%
group_by(feedipedia_node, DD.Variable, Unit) %>%
summarise(DD.Value.feedipedia = mean(Avg, na.rm = TRUE), .groups = "drop")
merged_metadata$Animals.Diet.Digest <- merged_metadata$Animals.Diet.Digest %>%
mutate(Feedipedia = as.numeric(Feedipedia)) %>%
left_join(feedipedia_digestibility, by = c("Feedipedia" = "feedipedia_node", "DD.Variable")) %>%
mutate(
value_missing = is.na(DD.Value),
DD.Value = coalesce(DD.Value, DD.Value.feedipedia),
DD.Unit = if_else(value_missing & !is.na(DD.Value.feedipedia), Unit, DD.Unit),
digestibility_source = if_else(value_missing & !is.na(DD.Value.feedipedia), "feedipedia", digestibility_source)
) %>%
select(-DD.Value.feedipedia, -Unit, -value_missing)
# Step 1: Get species for each B.Code
species_by_bcode <- merged_metadata$Prod.Out %>%
distinct(B.Code, P.Product)
# # Step 2: Filter Comp table for GE values
# ge_papers <- merged_metadata$Animals.Diet.Comp %>%
# filter(DC.Variable == "GE", !is.na(DC.Value)) %>%
# distinct(B.Code) %>%
# left_join(species_by_bcode, by = "B.Code") %>%
# count(P.Product, name = "papers_with_GE")
#
# # View result
# ge_papers
Now that we have both raw and Feedipedia data, we can use them to infer missing values. If some instances of an ingredient have nutritional or digestibility data while others do not, we can impute the missing values using the average of the available ones.
# --- Standardize Units in Animals.Diet.Comp ---
merged_metadata$Animals.Diet.Comp <- merged_metadata$Animals.Diet.Comp %>%
mutate(
# 1) normalise text --------------------------------------------------------
DC.Unit = tolower(trimws(DC.Unit)),
DC.Value = as.numeric(DC.Value)
) %>%
# 2) convert values ----------------------------------------------------------
mutate(
DC.Value = case_when(
# --- gross energy → MJ kg-1 -------------------------------------------
DC.Variable == "GE" ~ case_when(
DC.Unit %in% c("mj/kg", "mj/kg dm") ~ DC.Value,
DC.Unit %in% c("kcal/kg", "kcal/kg dm") ~ DC.Value * 0.004184,
DC.Unit == "kcal/g" ~ DC.Value * 4.184,
DC.Unit == "kcal/100g" ~ DC.Value * 0.04184, # fixed
DC.Unit == "cal/g" ~ DC.Value * 0.004184, # fixed
DC.Unit == "mcal/kg" ~ DC.Value * 4.184,
DC.Unit %in% c("kj/kg", "kj/kg dm") ~ DC.Value / 1000,
DC.Unit == "kj/g" ~ DC.Value, # already MJ/kg
DC.Unit %in% c("kj/100g", "kj/100g dm", "kj/100 g dm") ~ DC.Value * 0.01,
DC.Unit == "mj/100g" ~ DC.Value * 10,
DC.Unit == "j/mg" ~ DC.Value, # fixed
TRUE ~ NA_real_
),
# --- everything else → g kg-1 -----------------------------------------
TRUE ~ case_when(
DC.Unit %in% c("%", "% dm", "% as fed", "g/100g", "g/100g dm") ~ DC.Value * 10,
DC.Unit %in% c("g", "g/kg", "g/kg dm") ~ DC.Value,
DC.Unit %in% c("kg", "kg/day") ~ DC.Value * 1000,
TRUE ~ NA_real_
)
),
# 3) harmonised unit labels ----------------------------------------------
DC.Unit = if_else(DC.Variable == "GE" & !is.na(DC.Value), "MJ/kg", "g/kg")
)
# --- Compute Mean Value Per Ingredient and Variable ---
ingredient_means <- merged_metadata$Animals.Diet.Comp %>%
filter(is_entire_diet == FALSE, !is.na(DC.Value)) %>%
group_by(D.Item, DC.Variable) %>%
summarise(mean_value = mean(DC.Value, na.rm = TRUE), .groups = "drop")
merged_metadata$Animals.Diet.Comp <- merged_metadata$Animals.Diet.Comp %>%
left_join(ingredient_means, by = c("D.Item", "DC.Variable")) %>%
mutate(
nutrition_source = case_when(
nutrition_source == "feedipedia" ~ "feedipedia", # preserve Feedipedia
is.na(DC.Value) & !is_entire_diet & !is.na(mean_value) ~ "inferred_from_same_ingredient", # imputed
is.na(nutrition_source) & !is.na(DC.Value) ~ "raw data", # only if there's a value
TRUE ~ nutrition_source # keep whatever is there
),
DC.Value = if_else(
is.na(DC.Value) & !is_entire_diet & !is.na(mean_value),
mean_value,
DC.Value
)
) %>%
select(-mean_value)
# --- B) Compute Mean Digestibility Per Ingredient and Variable ---
digest_means <- merged_metadata$Animals.Diet.Digest %>%
filter(DD.Variable == "DE", is_entire_diet == FALSE, !is.na(DD.Value)) %>%
group_by(D.Item, DD.Variable, DD.Unit) %>%
summarise(mean_digest = mean(DD.Value, na.rm = TRUE), .groups = "drop")
# --- C) Fill Missing Digestibility Values with Ingredient Mean ---
merged_metadata$Animals.Diet.Digest <- merged_metadata$Animals.Diet.Digest %>%
left_join(digest_means, by = c("D.Item", "DD.Variable")) %>%
mutate(
digestibility_source = case_when(
digestibility_source == "feedipedia" ~ "feedipedia",
is.na(DD.Value) & !is_entire_diet & !is.na(mean_digest) ~ "inferred_from_same_ingredient",
is.na(digestibility_source) ~ "raw data",
TRUE ~ digestibility_source
),
DD.Value = if_else(
is.na(DD.Value) & !is_entire_diet & !is.na(mean_digest),
mean_digest,
DD.Value
)
) %>%
select(-mean_digest)
since there is only very few digestibility in the raw data logic that we dont gain any data by doing this because feedipedia is infered for all the ingredients we have the info for.
# ✅ Count how many papers had GE filled by Feedipedia, and how many of those are Cattle / Sheep / Goat
# Step 1: Filter for Feedipedia-filled GE entries
ge_filled_by_feedipedia <- merged_metadata$Animals.Diet.Comp %>%
filter(DC.Variable == "GE", nutrition_source == "feedipedia") %>%
distinct(B.Code)
# Step 2: Total number of studies
n_ge_feedipedia <- nrow(ge_filled_by_feedipedia)
# Step 3: Add species info and filter to ruminants
ge_species_feedipedia <- ge_filled_by_feedipedia %>%
left_join(merged_metadata$`Prod.Out` %>% select(B.Code, P.Product), by = "B.Code") %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
distinct(B.Code, P.Product)
# Step 4: Count per species
# ge_species_counts <- ge_species_feedipedia %>%
# count(P.Product, name = "Papers_with_GE_from_Feedipedia")
#
# # Display summary
# cat("📌 Total papers with GE filled from Feedipedia:", n_ge_feedipedia, "\n")
# cat("📌 Of those, species breakdown:\n")
# print(ge_species_counts)
# Step 1: Get species for each B.Code
species_by_bcode <- merged_metadata$Prod.Out %>%
distinct(B.Code, P.Product)
# # Step 2: Filter Comp table for GE values
# ge_papers <- merged_metadata$Animals.Diet.Comp %>%
# filter(DC.Variable == "GE", !is.na(DC.Value)) %>%
# distinct(B.Code) %>%
# left_join(species_by_bcode, by = "B.Code") %>%
# count(P.Product, name = "papers_with_GE")
#
# # View result
# ge_papers
# Define species of interest
species_list <- c("Cattle", "Sheep", "Goat")
# Initialize result containers
species_summary <- list()
bcode_table <- list()
# Loop over each species
for (species in species_list) {
# All B.Codes for this species
species_bcodes <- merged_metadata$Prod.Out %>%
filter(P.Product == species) %>%
pull(B.Code) %>%
unique()
# Feed intake B.Codes
feed_data <- merged_metadata$Data.Out %>%
filter(B.Code %in% species_bcodes, !is.na(ED.Mean.T)) %>%
select(B.Code, D.Item = ED.Intake.Item)
feed_bcodes <- unique(feed_data$B.Code)
# GE B.Codes
ge_data <- merged_metadata$Animals.Diet.Comp %>%
filter(B.Code %in% species_bcodes, DC.Variable == "GE", !is.na(DC.Value)) %>%
select(B.Code, D.Item)
ge_bcodes <- unique(ge_data$B.Code)
# Matched B.Codes (join on both B.Code and D.Item)
matched_bcodes <- inner_join(feed_data, ge_data, by = c("B.Code", "D.Item")) %>%
pull(B.Code) %>%
unique()
# Store species-level summary
species_summary[[species]] <- tibble(
Species = species,
Total_BCodes = length(species_bcodes),
With_FEED = length(feed_bcodes),
With_GE = length(ge_bcodes),
With_BOTH = length(matched_bcodes)
)
# Store long-format bcode list
bcode_table[[species]] <- tibble(
B.Code = c(species_bcodes, feed_bcodes, ge_bcodes, matched_bcodes),
Category = rep(c("All", "With_FEED", "With_GE", "With_BOTH"),
times = c(length(species_bcodes), length(feed_bcodes), length(ge_bcodes), length(matched_bcodes))),
Species = species
) %>% distinct()
}
# Bind results
species_summary_df <- bind_rows(species_summary)
bcode_table_df <- bind_rows(bcode_table)
# Show results
print(species_summary_df)
## # A tibble: 3 × 5
## Species Total_BCodes With_FEED With_GE With_BOTH
## <chr> <int> <int> <int> <int>
## 1 Cattle 127 127 104 23
## 2 Sheep 222 222 195 66
## 3 Goat 186 185 162 39
head(bcode_table_df)
## # A tibble: 6 × 3
## B.Code Category Species
## <chr> <chr> <chr>
## 1 NN0188 All Cattle
## 2 NN0293 All Cattle
## 3 AG0018 All Cattle
## 4 AG0051 All Cattle
## 5 EO0041 All Cattle
## 6 EO0067 All Cattle
###7.3 Coverage after Feedipedia
coverage_feedipedia <- merged_metadata$Animals.Diet.Comp %>%
filter(!is.na(DC.Variable)) %>%
mutate(
# Clean and detect actual missing values
DC.Value_clean = str_trim(as.character(DC.Value)),
DC.Value_clean = na_if(DC.Value_clean, ""),
DC.Value_clean = na_if(DC.Value_clean, "NA"),
DC.Value_clean = na_if(DC.Value_clean, "na")
) %>%
group_by(DC.Variable) %>%
summarise(
n_total = n(),
n_non_missing = sum(!is.na(DC.Value_clean)),
coverage_percent = round(100 * n_non_missing / n_total, 1),
.groups = "drop"
) %>%
arrange(desc(coverage_percent))
## Link data to ILRI database
# Define the raw URL for the Excel file
url <- "https://raw.githubusercontent.com/Mlolita26/ERA_Docs/main/era_master_sheet(9).xlsx"
# Download the file to a temporary location
temp_file <- tempfile(fileext = ".xlsx")
download.file(url, destfile = temp_file, mode = "wb")
# ---- Load ILRI nutrition data ----
ILRI <- read_excel(temp_file, sheet = "ssa_feedsdb", col_types = "text") %>%
janitor::clean_names()
## New names:
## • `ilri_feedname` -> `ilri_feedname...4`
## • `ilri_feedtype` -> `ilri_feedtype...5`
## • `ilri_feedcode` -> `ilri_feedcode...6`
## • `` -> `...27`
## • `ilri_feedcode` -> `ilri_feedcode...28`
## • `ilri_feedname` -> `ilri_feedname...29`
## • `ilri_feedtype` -> `ilri_feedtype...30`
names(ILRI) <- gsub("\\.\\.\\.[0-9]+", "", names(ILRI))
# Reshape ILRI data and standardize units
ilri_long <- ILRI %>%
select(ilri_feedcode = ilri_feedcode_6, cp, om, ndf, adf, adl, dm, me, n_el) %>%
pivot_longer(
cols = -ilri_feedcode,
names_to = "DC.Variable",
values_to = "ILRI_Value"
) %>%
mutate(
DC.Variable = toupper(DC.Variable),
ILRI_Value = case_when(
DC.Variable %in% c("ME", "NEL", "NEM", "NEG") ~ as.numeric(ILRI_Value),
TRUE ~ as.numeric(ILRI_Value) * 10 # convert % DM to g/kg DM
),
ILRI_Unit = case_when(
DC.Variable %in% c("ME", "NEL", "NEM", "NEG") ~ "MJ/kg DM",
TRUE ~ "g/kg DM"
)
)
# ---- Load and clean AOM mapping sheet ----
aom_map <- read_excel(temp_file, sheet = "AOM", col_types = "text") %>%
janitor::clean_names() %>%
filter(l5 == "Feed Ingredient") %>%
select(edge_value, ilri_code) %>%
mutate(ilri_code = as.character(ilri_code))
# Find feeds present in both ILRI and AOM based on ILRI code
common_feeds <- inner_join(
ilri_long %>% distinct(ilri_feedcode),
aom_map %>% distinct(ilri_code),
by = c("ilri_feedcode" = "ilri_code")
)
# ---- Merge ILRI feed codes into diet composition ----
merged_metadata$Animals.Diet.Comp <- merged_metadata$Animals.Diet.Comp %>%
left_join(aom_map, by = c("D.Item" = "edge_value"))%>%
distinct()
## Warning in left_join(., aom_map, by = c(D.Item = "edge_value")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 612 of `x` matches multiple rows in `y`.
## ℹ Row 967 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
join nutritional values
# ---- Join nutritional values from ILRI ----
merged_metadata$Animals.Diet.Comp <- merged_metadata$Animals.Diet.Comp %>%
left_join(ilri_long, by = c("ilri_code" = "ilri_feedcode", "DC.Variable"))%>%
distinct()
## Warning in left_join(., ilri_long, by = c(ilri_code = "ilri_feedcode", "DC.Variable")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 5718 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
fill missing values from ILRI database
merged_metadata$Animals.Diet.Comp <- merged_metadata$Animals.Diet.Comp %>%
mutate(
to_fill = is.na(DC.Value) & !is.na(ILRI_Value),
DC.Value = if_else(to_fill, ILRI_Value, DC.Value),
DC.Unit = if_else(to_fill, ILRI_Unit, DC.Unit),
nutrition_source = if_else(to_fill, "ilri", nutrition_source)
) %>%
select(-ILRI_Value, -ILRI_Unit, -to_fill)
merged_metadata$Animals.Diet.Comp <- merged_metadata$Animals.Diet.Comp %>%
mutate(
DC.Unit = case_when(
DC.Unit == "g/kg DM" ~ "g/kg",
DC.Unit == "MJ/kg DM" ~ "MJ/kg",
TRUE ~ DC.Unit
)
)%>%
distinct()
coverage_ILRI <- merged_metadata$Animals.Diet.Comp %>%
filter(!is.na(DC.Variable)) %>%
mutate(
# Clean and detect actual missing values
DC.Value_clean = str_trim(as.character(DC.Value)),
DC.Value_clean = na_if(DC.Value_clean, ""),
DC.Value_clean = na_if(DC.Value_clean, "NA"),
DC.Value_clean = na_if(DC.Value_clean, "na")
) %>%
group_by(DC.Variable) %>%
summarise(
n_total = n(),
n_non_missing = sum(!is.na(DC.Value_clean)),
coverage_percent = round(100 * n_non_missing / n_total, 1),
.groups = "drop"
) %>%
arrange(desc(coverage_percent))
coverage_summary <- bind_rows(
coverage_raw %>% mutate(Source = "raw"),
coverage_feedipedia %>% mutate(Source = "feedipedia"),
coverage_ILRI %>% mutate(Source = "ILRI")
)
# Pivot to wide format: one row per variable, columns for each source
coverage_summary_wide <- coverage_summary %>%
select(DC.Variable, Source, coverage_percent) %>%
pivot_wider(names_from = Source, values_from = coverage_percent)
# Optional: sort by raw coverage
coverage_summary_wide <- coverage_summary_wide %>%
arrange(desc(raw))
##— build the control key from your merged MT.Out
ctrl_key <- merged_metadata$MT.Out[, .(
B.Code,
A.Level.Name,
T.Name,
T.Control
)]
##— example join back onto your ingredient table
# (assuming your ingredients DT has B.Code + A.Level.Name)
ingredients <- merge(
x = ingredients,
y = ctrl_key,
by = c("B.Code","A.Level.Name"),
all.x = TRUE,
sort = FALSE
)
This table presents the relative percentage of each ingredient within a diet. To ensure accurate proportion calculations, we first filtered the dataset to include only diets with consistent units. Diets with incompatible or non-harmonizable units were excluded, as their proportions could not be reliably determined.
# (a) Ingredient-based totals ----------------------------------------------------
consistent_diets <- ingredients %>% # ignore "Entire Diet" rows
filter(D.Type != "Entire Diet") %>%
group_by(B.Code, A.Level.Name) %>%
mutate(Unit_Count = n_distinct(D.Unit.Amount)) %>% # always 1 after conversion
filter(Unit_Count == 1) %>% # keep only single-unit diets
ungroup()
diet_completeness <- consistent_diets %>%
filter(D.Type != "Entire Diet") %>%
group_by(B.Code, A.Level.Name) %>%
summarise(
n_total = n(),
n_nonmissing = sum(!is.na(D.Amount)),
.groups = "drop"
) %>%
filter(n_total == n_nonmissing)
total_ingr <- consistent_diets %>%
semi_join(diet_completeness, by = c("B.Code", "A.Level.Name")) %>%
group_by(B.Code, A.Level.Name, D.Unit.Amount) %>%
summarise(Total_Amount_ingr = sum(D.Amount), .groups = "drop")
# (b) Feed-intake figures that already represent the whole diet -----------------
feed_intake_diet_g <- feed_intake_diet %>%
mutate(
# convert the numeric amount ---------------------------------------------
Intake_Amount_g = case_when(
Out.Unit.Amount == "kg" ~ ED.Mean.T * 1000, # kg → g
Out.Unit.Amount == "g" ~ ED.Mean.T, # already g
TRUE ~ NA_real_ # any other unit → NA
),
# overwrite the *unit* once it has been converted -------------------------
Out.Unit.Amount = case_when(
Out.Unit.Amount %in% c("kg", "g") ~ "g", # now everything is g
TRUE ~ Out.Unit.Amount
)
) %>%
rename(D.Unit.Amount = Out.Unit.Amount) %>% # unify the column name
select(B.Code, A.Level.Name, D.Unit.Amount, Intake_Amount_g)
total_amounts <- full_join(
feed_intake_diet_g,
total_ingr,
by = c("B.Code", "A.Level.Name", "D.Unit.Amount")
) %>%
mutate(
Total_Amount = coalesce(Intake_Amount_g, Total_Amount_ingr)
) %>%
select(B.Code, A.Level.Name, D.Unit.Amount, Total_Amount)
# ────────────────────────────────────────────────────────────────────────────────
# 2. Check unit correspondence -------------------------------------------------
unit_check <- ingredients %>%
select(B.Code, A.Level.Name, Ingredient_Unit = D.Unit.Amount) %>%
distinct() %>%
left_join(
total_amounts %>%
select(B.Code, A.Level.Name, Total_Unit = D.Unit.Amount),
by = c("B.Code", "A.Level.Name")
) %>%
mutate(Consistent_Units = Ingredient_Unit == Total_Unit)
# View the correspondence table (interactive)
datatable(
unit_check,
options = list(pageLength = 10, scrollX = TRUE, autoWidth = TRUE),
caption = "Unit correspondence between ingredient rows and diet totals"
)
# Keep only diets whose units are consistent
consistent_unit_diets <- unit_check %>%
filter(Consistent_Units) %>%
select(B.Code, A.Level.Name)
ingredients_clean <- ingredients %>%
semi_join(consistent_unit_diets, by = c("B.Code", "A.Level.Name"))
totals_clean <- total_amounts %>%
semi_join(consistent_unit_diets, by = c("B.Code", "A.Level.Name"))
# ────────────────────────────────────────────────────────────────────────────────
# 3. Calculate percentage contribution of each ingredient ----------------------
diet_percentages <- ingredients_clean %>%
filter(D.Type != "Entire Diet") %>% # <-- keep the pipe!
left_join(totals_clean,
by = c("B.Code", "A.Level.Name", "D.Unit.Amount")) %>%
mutate(
Percentage_of_Diet = ifelse(Total_Amount > 0,
(D.Amount / Total_Amount) * 100,
NA_real_)
) %>%
select(
B.Code, A.Level.Name, D.Item, D.Type,
D.Amount, D.Unit.Amount, Percentage_of_Diet
)
# Interactive tables ------------------------------------------------------------
datatable(
totals_clean,
options = list(pageLength = 10, scrollX = TRUE, autoWidth = TRUE),
caption = "Total amount of each diet (grams)"
)
datatable(
diet_percentages,
options = list(pageLength = 10, scrollX = TRUE, autoWidth = TRUE),
caption = "Relative percentage of each ingredient within its diet"
)
# Clean up helper objects if desired
rm(consistent_diets, diet_completeness, total_ingr,
feed_intake_diet_g, unit_check, consistent_unit_diets)
# Check if each diet sums close to 100%
validation <- diet_percentages %>%
group_by(B.Code, A.Level.Name) %>%
summarise(Total_Percentage = sum(Percentage_of_Diet, na.rm=TRUE)) %>%
ungroup()
# Display diets and their total percentages
# datatable(
# validation,
# options = list(
# pageLength = 10, # Show 10 rows per page by default
# scrollX = TRUE, # Enable horizontal scrolling
# autoWidth = TRUE, # Adjust column width automatically
# searchHighlight = TRUE # Highlight search terms
# ),
# caption = "Validation table"
# )
The lines that have 0 as validation are the ones that were feeding ad libitum for which we do not have a precise amount so it’s impossible to calculate the relative proportion of the ingredients.
Enteric methane (CH₄) emissions were estimated using the IPCC Tier 2 methodology, which incorporates animal-specific data such as gross energy intake (GE) and methane conversion factors (Ym). This approach improves accuracy over Tier 1 by reflecting diet composition, feed intake, and species-specific differences.
For GE values reported at the entire diet level, daily gross energy intake per animal was calculated as:
\[ \text{GE}_{\text{daily}} = \text{GE}_{\text{diet}} \times \text{Diet Dry Amount} \]
Where:
To do that we will have ton convert all feed intake data into kg/day/animal.
When GE values were reported at the ingredient level, and information on diet composition (ingredient proportions) was available, the diet-level GE was reconstructed as:
\[ \text{GE}_{\text{diet}} = \sum_{i=1}^{n} (\text{GE}_i \times p_i) \]
Where:
Then, daily intake was computed using the same formula above.
Annual methane emissions were calculated using the IPCC Tier 2 equation:
\[ \text{CH}_4 = \frac{\text{GE}_{\text{daily}} \times \text{Ym} \times 365}{55.65} \]
Where:
1. Table of Diet Composition Proportions
This table defines the proportion of each ingredient in the overall diet:
# Create an interactive table for "total_amounts"
datatable(
diet_percentages,
options = list(
pageLength = 10, # Show 10 rows per page by default
scrollX = TRUE, # Enable horizontal scrolling
autoWidth = TRUE, # Adjust column width automatically
searchHighlight = TRUE # Highlight search terms
),
caption = "Interactive Table: Relative % of Ingredients"
)
Used to compute reconstructed GE_diet
from
ingredient-level GE values:
\[ \text{GE}_{\text{diet}} = \sum_{i=1}^{n} (\text{GE}_i \times p_i) \]
2. Table of Ingredient Dry Amounts
This table stores the amount of each ingredient fed per animal per day, already in or converted to dry matter:
# This 'ingredients' now contains all diets + basal integrated.
# Table 1: Final diet amounts with harmonized units (including basal)
datatable(
ingredients,
options = list(
pageLength = 10, # Show 10 rows per page by default
scrollX = TRUE, # Enable horizontal scrolling
autoWidth = TRUE, # Adjust column width automatically
searchHighlight = TRUE # Highlight search terms
),
caption = "Interactive Table: Diet Ingredients"
)
Useful when GE is per ingredient and diet is not proportionally described.
3. Table of Total Diet Dry Amounts
This is the total daily dry matter offered or consumed per animal, either directly from intake or inferred from ingredient sums:
datatable(
total_amounts,
options = list(
pageLength = 10, # Show 10 rows per page by default
scrollX = TRUE, # Enable horizontal scrolling
autoWidth = TRUE, # Adjust column width automatically
searchHighlight = TRUE # Highlight search terms
),
caption = "Interactive Table: Total Amounts of Diet Ingredients"
)
\[ \text{GE}_{\text{daily}} = \text{GE}_{\text{diet}} \times \text{Diet Dry Amount} \]
# 1. Total papers per species
species_counts <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Total_Studies")
# 2. All papers with GE (non-NA)
GE_data <- merged_metadata$Animals.Diet.Comp %>%
filter(DC.Variable == "GE", !is.na(DC.Value) & DC.Value != "") %>%
distinct(B.Code)
# 3. Papers with GE and species (intersection)
GE_species <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
semi_join(GE_data, by = "B.Code") %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Studies_with_GE")
# 4. Combine the two
summary_table <- species_counts %>%
full_join(GE_species, by = "P.Product") %>%
mutate(across(everything(), ~replace_na(., 0)))
# Display table
kable(summary_table, caption = "Summary of Data Availability for Enteric CH₄ Emissions (Tier 2)")
P.Product | Total_Studies | Studies_with_GE |
---|---|---|
Cattle | 127 | 104 |
Goat | 186 | 162 |
Sheep | 222 | 195 |
# 1. Total papers per species
species_counts <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Total_Studies")
# 2. All papers with GE (non-NA)
GE_data <- merged_metadata$Animals.Diet.Comp %>%
filter(DC.Variable == "GE") %>%
filter(DC.Variable == "GE", !is.na(DC.Value) & DC.Value != "") %>%
distinct(B.Code)
# 3. Papers with GE and species (intersection)
GE_species <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
semi_join(GE_data, by = "B.Code") %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Studies_with_GE")
# 4. Combine the two
summary_table <- species_counts %>%
full_join(GE_species, by = "P.Product") %>%
mutate(across(everything(), ~replace_na(., 0)))
# Display table
kable(summary_table, caption = "Summary of Data Availability for Enteric CH₄ Emissions (Tier 2)")
P.Product | Total_Studies | Studies_with_GE |
---|---|---|
Cattle | 127 | 104 |
Goat | 186 | 162 |
Sheep | 222 | 195 |
# Inspect the units used for GE
Gross_energy <- merged_metadata$Animals.Diet.Comp %>%
filter(DC.Variable == "GE")
Gross_energy$DC.Value<-as.numeric(Gross_energy$DC.Value)
unique(Gross_energy$DC.Unit)
## [1] "g/kg" "MJ/kg"
# Harmonize only GE values and overwrite DC.Value
## ---- harmonise_gross_energy_to_MJkg -----------------------------------------
Gross_energy <- Gross_energy %>%
mutate(
## 1) lower-case + trim once ----------------------------------------------
unit_raw = tolower(trimws(DC.Unit))
) %>%
## 2) convert GE values to MJ/kg --------------------------------------------
mutate(
DC.Value = case_when(
DC.Variable == "GE" ~ case_when(
unit_raw %in% c("mj/kg", "mj/kg dm") ~ DC.Value,
unit_raw %in% c("kcal/kg", "kcal/kg dm") ~ DC.Value * 0.004184,
unit_raw == "kcal/g" ~ DC.Value * 4.184,
unit_raw == "kcal/100g" ~ DC.Value * 0.04184, # 1 kcal = 0.004184 MJ, ×10 (100 g→kg)
unit_raw == "cal/g" ~ DC.Value * 0.004184, # 1 cal g⁻¹ → 0.004184 MJ kg⁻¹
unit_raw == "mcal/kg" ~ DC.Value * 4.184,
unit_raw %in% c("kj/kg", "kj/kg dm") ~ DC.Value / 1000,
unit_raw == "kj/g" ~ DC.Value, # already MJ kg⁻¹ (kJ g⁻¹ ×1000)
unit_raw %in% c("kj/100g", "kj/100g dm", "kj/100 g dm") ~ DC.Value * 0.01,
unit_raw == "mj/100g" ~ DC.Value * 10,
unit_raw == "j/mg" ~ DC.Value, # J mg⁻¹ → MJ kg⁻¹ (10⁻⁶ / 10⁻⁶)
TRUE ~ NA_real_
),
TRUE ~ DC.Value # non-GE rows stay intact
),
## 3) update unit label ----------------------------------------------------
DC.Unit = if_else(DC.Variable == "GE" & !is.na(DC.Value), "MJ/kg", unit_raw)
) %>%
## 4) tidy up ---------------------------------------------------------------
select(-unit_raw)
unique(Gross_energy$DC.Unit)
## [1] "g/kg" "MJ/kg"
Gross_energy <- Gross_energy %>%
# STEP 1: Create GE_source from nutrition_source
mutate(
GE_source = case_when(
nutrition_source == "feedipedia" ~ "feedipedia",
nutrition_source == "inferred_from_same_ingredient" ~ "inferred_from_same_ingredient",
nutrition_source == "raw data" ~ "raw data",
TRUE ~ NA_character_
)
)%>%
select(-"nutrition_source")
g/kg ❌ Not an energy unit Likely a mistake, investigate or exclude % / % DM / % as fed ❌ Not energy units Same as above Unspecified ❌ Ambiguous Investigate or exclude
# 1. Total papers per species
species_counts <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Total_Studies")
# 2. All papers with GE (non-NA)
GE_data <- Gross_energy %>%
filter(DC.Variable == "GE", !is.na(DC.Value) & DC.Value != "") %>%
distinct(B.Code)
# 3. Papers with GE and species (intersection)
GE_species <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
semi_join(GE_data, by = "B.Code") %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Studies_with_GE")
# 4. Combine the two
summary_table <- species_counts %>%
full_join(GE_species, by = "P.Product") %>%
mutate(across(everything(), ~replace_na(., 0)))
# Display table
kable(summary_table, caption = "Summary of Data Availability for Enteric CH₄ Emissions (Tier 2)")
P.Product | Total_Studies | Studies_with_GE |
---|---|---|
Cattle | 127 | 89 |
Goat | 186 | 146 |
Sheep | 222 | 178 |
Gross_energy_check <- Gross_energy %>%
filter(!is.na(DC.Value))%>%
filter(GE_source=="raw data")
# ── 1 · Identify the non-energy ingredients ───────────────────────────────────
nonenergy_types <- c("Non-ERA Feed", "Supplement") # ← NEW: add more if needed
# ── 1 · Look-up table of those items ─────────────────────────────────────────
nonenergy_lookup <- merged_metadata$Animals.Diet %>% # or use `ingredients`
filter(D.Type %in% nonenergy_types) %>% # ← NEW: %in% the vector
distinct(B.Code, D.Item)
# ── 2 · Make sure every (paper × item) has a GE row ───────────────────────────
missing_nonera_ge <- nonenergy_lookup %>%
anti_join( # keep only those
Gross_energy %>% filter(DC.Variable == "GE") %>% # already in GE table
select(B.Code, D.Item),
by = c("B.Code", "D.Item")
) %>%
mutate(
DC.Variable = "GE",
DC.Value = 0,
DC.Unit = "MJ/kg",
GE_source = "assumed_zero"
)
# ── 3 · Bind and override (value, unit, source) for every Non-ERA row ─────────
Gross_energy <- Gross_energy %>%
# add the rows that were completely missing
bind_rows(missing_nonera_ge) %>%
# tag which rows are Non-ERA so we can overwrite them
left_join(nonenergy_lookup %>% mutate(flag_nonera = TRUE),
by = c("B.Code", "D.Item")) %>%
mutate(
DC.Value = if_else(DC.Variable == "GE" & flag_nonera, 0, DC.Value),
DC.Unit = if_else(DC.Variable == "GE" & flag_nonera, "MJ/kg", DC.Unit),
GE_source = if_else(DC.Variable == "GE" & flag_nonera, "assumed_zero", GE_source)
) %>%
select(-flag_nonera) # tidy up
To estimate Gross Energy (GE) when direct measurements are not available, we use a predictive equation based on proximate nutritional components: crude protein (CP), ether extract (EE), and ash. This method provides an accurate approximation of the energy content of feed ingredients based on their macronutrient composition.
The equation used is adapted from Weiss et al. (2019) and is expressed as:
GE (Mcal/kg) = (0.056 × CP) + (0.094 × EE) + [0.042 × (100 − CP − EE − Ash)]
GE (MJ/kg) = GE (Mcal/kg) × 4.184
# --- STEP 1: Prepare predictors (CP, EE, Ash) as % of DM ---
# Keep relevant metadata for later
metadata_cols <- merged_metadata$Animals.Diet.Comp %>%
select(B.Code, D.Item, is_entire_diet, Feedipedia) %>%
distinct()
ge_predictors <- merged_metadata$Animals.Diet.Comp %>%
filter(DC.Variable %in% c("CP", "EE", "Ash")) %>%
mutate(
DC.Unit = trimws(tolower(DC.Unit)),
DC.Value = as.numeric(DC.Value),
# Harmonize to % of DM
DC.Value = case_when(
DC.Unit %in% c("%", "% dm", "% as fed", "g/100g", "g/100g dm") ~ DC.Value,
DC.Unit %in% c("g", "g/kg", "g/kg dm") ~ DC.Value / 10,
DC.Unit %in% c("kg", "kg/day") ~ DC.Value * 100,
TRUE ~ NA_real_
),
DC.Unit = "%"
) %>%
group_by(B.Code, D.Item, DC.Variable) %>%
summarise(DC.Value = mean(DC.Value, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = DC.Variable, values_from = DC.Value) %>%
left_join(metadata_cols, by = c("B.Code", "D.Item"))
# --- STEP 2: Apply Weiss GE equation only where CP, EE, Ash are all present ---
ge_predicted <- ge_predictors %>%
mutate(
GE_Mcal_per_kg = ifelse(
!is.na(CP) & !is.na(EE) & !is.na(Ash),
(CP * 0.056) + (EE * 0.094) + ((100 - CP - EE - Ash) * 0.042),
NA_real_
),
GE_MJ_per_kg = GE_Mcal_per_kg * 4.184
) %>%
select(B.Code, D.Item, GE_MJ_per_kg, is_entire_diet, Feedipedia)
# --- STEP 3: Merge and only impute where GE is missing ---
Gross_energy <- Gross_energy %>%
# STEP 2: Join GE predictions (e.g. from Weiss)
full_join(ge_predicted, by = c("B.Code", "D.Item","is_entire_diet","Feedipedia")) %>%
# STEP 3: Only update GE_source if it is still missing
mutate(
GE_source = case_when(
is.na(GE_source) & !is.na(GE_MJ_per_kg) & is.na(DC.Value) ~ "predicted_from_Weiss_GE",
TRUE ~ GE_source
),
DC.Value = if_else(is.na(DC.Value), GE_MJ_per_kg, DC.Value),
DC.Unit = "MJ/kg",
DC.Variable = "GE"
) %>%
select(-GE_MJ_per_kg)
# 1. Total papers per species
species_counts <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Total_Studies")
# 2. All papers with GE (non-NA)
GE_data <- Gross_energy %>%
filter(DC.Variable == "GE", !is.na(DC.Value) & DC.Value != "") %>%
distinct(B.Code)
# 3. Papers with GE and species (intersection)
GE_species <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
semi_join(GE_data, by = "B.Code") %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Studies_with_GE")
# 4. Combine the two
summary_table <- species_counts %>%
full_join(GE_species, by = "P.Product") %>%
mutate(across(everything(), ~replace_na(., 0)))
# Display table
kable(summary_table, caption = "Summary of Data Availability for Enteric CH₄ Emissions (Tier 2)")
P.Product | Total_Studies | Studies_with_GE |
---|---|---|
Cattle | 127 | 119 |
Goat | 186 | 179 |
Sheep | 222 | 217 |
As we did in 9) we are inferring again GE data after estimation by proximal nutritional composition.
ingredient_means <- Gross_energy %>%
filter(is_entire_diet == FALSE, !is.na(DC.Value)) %>%
group_by(D.Item, DC.Variable) %>%
summarise(mean_value = mean(DC.Value, na.rm = TRUE), .groups = "drop")
# --- Fill Missing Values with Mean ---
Gross_energy <- Gross_energy %>%
left_join(ingredient_means, by = c("D.Item", "DC.Variable")) %>%
mutate(
GE_source = case_when(
is.na(DC.Value) & !is_entire_diet & !is.na(mean_value) ~ "inferred_from_same_ingredient-Step2",
TRUE ~ GE_source
),
DC.Value = if_else(
is.na(DC.Value) & !is_entire_diet & !is.na(mean_value),
mean_value,
DC.Value
)
) %>%
select(-mean_value)
# 1. Total papers per species
species_counts <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Total_Studies")
# 2. All papers with GE (non-NA)
GE_data <- Gross_energy %>%
filter(DC.Variable == "GE", !is.na(DC.Value) & DC.Value != "") %>%
distinct(B.Code)
# 3. Papers with GE and species (intersection)
GE_species <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
semi_join(GE_data, by = "B.Code") %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Studies_with_GE")
# 4. Combine the two
summary_table <- species_counts %>%
full_join(GE_species, by = "P.Product") %>%
mutate(across(everything(), ~replace_na(., 0)))
# Display table
kable(summary_table, caption = "Summary of Data Availability for Enteric CH₄ Emissions (Tier 2)")
P.Product | Total_Studies | Studies_with_GE |
---|---|---|
Cattle | 127 | 119 |
Goat | 186 | 179 |
Sheep | 222 | 217 |
# --- STEP 4: Prepare predictors for Weiss DE equation ---
# --- STEP 1: Store relevant metadata before pivot ---
de_metadata_cols <- merged_metadata$Animals.Diet.Comp %>%
select(B.Code, D.Item, is_entire_diet, Feedipedia) %>%
distinct()
# --- STEP 2: Prepare DE predictors and harmonize units ---
de_predictors <- merged_metadata$Animals.Diet.Comp %>%
filter(DC.Variable %in% c("CP", "EE", "Ash", "NDF")) %>%
mutate(
DC.Unit = trimws(tolower(DC.Unit)),
DC.Value = as.numeric(DC.Value),
# Harmonize to g/kg DM
DC.Value = case_when(
DC.Unit %in% c("%", "% dm", "% as fed", "g/100g", "g/100g dm") ~ DC.Value * 10,
DC.Unit %in% c("g", "g/kg", "g/kg dm") ~ DC.Value,
DC.Unit %in% c("kg", "kg/day") ~ DC.Value * 1000,
TRUE ~ NA_real_
),
DC.Unit = "g/kg"
) %>%
group_by(B.Code, D.Item, DC.Variable) %>%
summarise(DC.Value = mean(DC.Value, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = DC.Variable, values_from = DC.Value) %>%
left_join(de_metadata_cols, by = c("B.Code", "D.Item"))
# --- STEP 3: Apply Weiss DE equation and convert to GE ---
de_predicted <- de_predictors %>%
mutate(
DE_Mcal_per_kg = ifelse(
!is.na(CP) & !is.na(EE) & !is.na(Ash) & !is.na(NDF),
1.044 + 0.0101 * CP + 0.0161 * EE - 0.0037 * Ash - 0.0023 * NDF,
NA_real_
),
DE_MJ_per_kg = DE_Mcal_per_kg * 4.184,
GE_MJ_per_kg = ifelse(!is.na(DE_MJ_per_kg), DE_MJ_per_kg / 0.54, NA_real_)
) %>%
select(B.Code, D.Item, GE_MJ_per_kg, is_entire_diet, Feedipedia)
# --- STEP 6: Fill remaining missing GE using DE-based predictions only if GE is still missing ---
Gross_energy <- full_join(Gross_energy, de_predicted, by = c("B.Code", "D.Item", "is_entire_diet", "Feedipedia")) %>%
mutate(
DC.Value = if_else(is.na(DC.Value) & !is.na(GE_MJ_per_kg), GE_MJ_per_kg, DC.Value),
GE_source = case_when(
is.na(GE_source) & is.na(DC.Value) & !is.na(GE_MJ_per_kg) ~ "predicted_from_Weiss_DE",
TRUE ~ GE_source # keep existing label (e.g., feedipedia, raw data, inferred, or missing)
),
DC.Unit = "MJ/kg",
DC.Variable = "GE"
) %>%
select(-GE_MJ_per_kg)
GE_data <- Gross_energy %>%
filter(DC.Variable == "GE", !is.na(DC.Value) & DC.Value != "") %>%
distinct(B.Code)
# 3. Papers with GE and species (intersection)
GE_species <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
semi_join(GE_data, by = "B.Code") %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Studies_with_GE")
summary_table <- species_counts %>%
full_join(GE_species, by = "P.Product") %>%
mutate(across(everything(), ~replace_na(., 0)))
kable(summary_table, caption = "Summary of Data Availability for Enteric CH₄ Emissions (Tier 2)")
P.Product | Total_Studies | Studies_with_GE |
---|---|---|
Cattle | 127 | 119 |
Goat | 186 | 179 |
Sheep | 222 | 217 |
We dont get more papers seems logical because probably the same ingredient as the equation for GE.
Converts digestible energy (DE) to gross energy (GE) using a default digestibility factor (54%) when DE is reported but GE is not.
# 1. Total papers per species
species_counts <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Total_Studies")
# 2. All papers with GE (non-NA)
DE_data <- merged_metadata$Animals.Diet.Comp %>%
filter(DC.Variable == "DE", !is.na(DC.Value) & DC.Value != "") %>%
distinct(B.Code)
# 3. Papers with GE and species (intersection)
DE_species <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
semi_join(DE_data, by = "B.Code") %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Studies_with_DE")
# 4. Combine the two
summary_table <- species_counts %>%
full_join(DE_species, by = "P.Product") %>%
mutate(across(everything(), ~replace_na(., 0)))
# Display table
kable(summary_table, caption = "Summary of Data Availability for Enteric CH₄ Emissions (Tier 2)")
P.Product | Total_Studies | Studies_with_DE |
---|---|---|
Cattle | 127 | 0 |
Goat | 186 | 2 |
Sheep | 222 | 2 |
Most of the DE data is coming from feediepdia which is expressed as a % of DE so we can’t convert that data back using the IPCC method.
# # Define digestibility conversion factor (Song et al., 2025)
# default_DE_fraction <- 0.54 # 54%
#
# # 1. Extract DE rows from merged_metadata
# de_data <- merged_metadata$Animals.Diet.Digest %>%
# filter(DD.Variable == "DE", !is.na(DD.Value) & DD.Value != "") %>%
# mutate(
# DD.Value = as.numeric(DD.Value),
# DD.Unit = trimws(tolower(DD.Unit)),
# DD.Value = case_when(
# DD.Unit %in% c("mj/kg", "mj/kg dm") ~ DD.Value,
# DD.Unit == "mcal/kg" ~ DD.Value * 4.184,
# DD.Unit == "kcal/kg" ~ DD.Value / 239,
# DD.Unit == "kcal/kg dm" ~ DD.Value / 239,
# DD.Unit == "kcal/100g" ~ (DD.Value * 10) / 239,
# DD.Unit == "kj/g" ~ DD.Value / 1000,
# DD.Unit == "kj/100g" ~ DD.Value / 100,
# TRUE ~ NA_real_
# ),
# DD.Unit = "MJ/kg"
# ) %>%
# mutate(
# GE_from_DE = DD.Value / default_DE_fraction
# ) %>%
# select(B.Code, D.Item, GE_from_DE)
#
# # 2. Merge GE_from_DE into your existing Gross_energy dataset
# Gross_energy <- Gross_energy %>%
# left_join(de_data, by = c("B.Code", "D.Item")) %>%
# mutate(
# # Flag only when GE was previously missing and DE is available
# GE_source = case_when(
# is.na(GE_source) & is.na(DC.Value) & !is.na(GE_from_DE) ~ "inferred_from_DE",
# TRUE ~ GE_source
# ),
# # Fill in missing GE values with GE_from_DE
# DC.Value = if_else(is.na(DC.Value), GE_from_DE, DC.Value),
# DC.Unit = "MJ/kg",
# DC.Variable = "GE"
# ) %>%
# select(-GE_from_DE)
# 1. Total studies per species
species_total <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
distinct(B.Code, P.Product)
# 2. GE: Only B.Codes where DC.Variable == GE and DC.Value is NOT NA
ge_bcodes <- Gross_energy %>%
filter(DC.Variable == "GE", !is.na(DC.Value)) %>%
distinct(B.Code) %>%
pull()
# 3. Feed: Only B.Codes where D.Amount is NOT NA
feed_amount_bcodes <- ingredients %>%
filter(!is.na(D.Amount)) %>%
distinct(B.Code) %>%
pull()
# 4. B.Codes with BOTH GE value AND feed amount
both_bcodes <- intersect(ge_bcodes, feed_amount_bcodes)
# 5. Build final summary table by species
final_summary <- species_total %>%
mutate(
Has_GE = B.Code %in% ge_bcodes,
Has_FEED_or_Amount = B.Code %in% feed_amount_bcodes,
Has_BOTH = B.Code %in% both_bcodes
) %>%
group_by(P.Product) %>%
summarise(
Total_Studies = n_distinct(B.Code),
Studies_with_GE = sum(Has_GE),
Studies_with_FEED_or_Amount = sum(Has_FEED_or_Amount),
Studies_with_BOTH = sum(Has_BOTH)
) %>%
arrange(P.Product)
# Show summary
knitr::kable(final_summary, caption = "Final Summary (with actual GE and Feed Amount values)")
P.Product | Total_Studies | Studies_with_GE | Studies_with_FEED_or_Amount | Studies_with_BOTH |
---|---|---|---|---|
Cattle | 127 | 119 | 63 | 62 |
Goat | 186 | 179 | 105 | 102 |
Sheep | 222 | 217 | 147 | 145 |
# Optional: List the actual B.Codes with both
bcode_list_both <- species_total %>%
filter(B.Code %in% both_bcodes) %>%
arrange(P.Product, B.Code)
# Show the B.Codes table
knitr::kable(bcode_list_both, caption = "B.Codes with BOTH GE (DC.Value) and Feed Intake (D.Amount)")
B.Code | P.Product |
---|---|
AG0018 | Cattle |
BO1033 | Cattle |
BO1038 | Cattle |
BO1040 | Cattle |
BO1054 | Cattle |
BO1055 | Cattle |
BO1070 | Cattle |
BO1072 | Cattle |
BO1080 | Cattle |
CJ1005 | Cattle |
CJ1006 | Cattle |
EM1022 | Cattle |
EM1047 | Cattle |
EM1050 | Cattle |
EM1079 | Cattle |
EM1084 | Cattle |
EM1101 | Cattle |
HK0086 | Cattle |
HK0138 | Cattle |
HK0270 | Cattle |
JO1004 | Cattle |
JO1045 | Cattle |
JO1046 | Cattle |
JO1050 | Cattle |
JO1053 | Cattle |
JO1091 | Cattle |
JS0176 | Cattle |
JS0205 | Cattle |
JS0213.1 | Cattle |
JS0214 | Cattle |
JS0215.1 | Cattle |
JS0215.2 | Cattle |
JS0269.2 | Cattle |
JS0324.1 | Cattle |
JS0324.2 | Cattle |
LM0101 | Cattle |
LM0115 | Cattle |
LM0117.1 | Cattle |
LM0117.2 | Cattle |
LM0117.3 | Cattle |
LM0123 | Cattle |
LM0124 | Cattle |
LM0126 | Cattle |
LM0151.1 | Cattle |
LM0151.2 | Cattle |
LM0227.1 | Cattle |
LM0227.2 | Cattle |
LM0228.1 | Cattle |
LM0228.2 | Cattle |
LM0229 | Cattle |
NJ0016 | Cattle |
NJ0025 | Cattle |
NN0005 | Cattle |
NN0188 | Cattle |
NN0207.2 | Cattle |
NN0216 | Cattle |
NN0227 | Cattle |
NN0253 | Cattle |
NN0255 | Cattle |
NN0280.1 | Cattle |
NN0299 | Cattle |
NN0315 | Cattle |
AG0042 | Goat |
AN0057 | Goat |
BO1002 | Goat |
BO1003 | Goat |
BO1005 | Goat |
BO1009 | Goat |
BO1019 | Goat |
BO1026 | Goat |
BO1029 | Goat |
BO1037 | Goat |
BO1042 | Goat |
BO1046 | Goat |
BO1065 | Goat |
BO1068 | Goat |
BO1074 | Goat |
BO1076 | Goat |
BO1079 | Goat |
BO1083 | Goat |
BO1088 | Goat |
BO1091 | Goat |
BO1093 | Goat |
BO1098 | Goat |
BO1099 | Goat |
CJ1002 | Goat |
CJ1014 | Goat |
EM1005 | Goat |
EM1011 | Goat |
EM1014 | Goat |
EM1025 | Goat |
EM1028 | Goat |
EM1030 | Goat |
EM1045 | Goat |
EM1052 | Goat |
EM1053 | Goat |
EM1056 | Goat |
EM1072 | Goat |
EM1073 | Goat |
EM1074 | Goat |
EM1077 | Goat |
EM1078 | Goat |
EM1100 | Goat |
EM1107 | Goat |
EO0078 | Goat |
HK0029 | Goat |
HK0107 | Goat |
HK0108 | Goat |
HK0143 | Goat |
HK0171 | Goat |
JO1003 | Goat |
JO1005 | Goat |
JO1010 | Goat |
JO1023 | Goat |
JO1031 | Goat |
JO1034 | Goat |
JO1039.2 | Goat |
JO1044 | Goat |
JO1048 | Goat |
JO1051 | Goat |
JO1060 | Goat |
JO1079 | Goat |
JO1080 | Goat |
JO1081 | Goat |
JO1087 | Goat |
JO1099 | Goat |
JO1100 | Goat |
JS0048 | Goat |
JS0095 | Goat |
JS0115 | Goat |
JS0118 | Goat |
JS0119 | Goat |
JS0172 | Goat |
JS0192 | Goat |
JS0197 | Goat |
JS0206 | Goat |
JS0211 | Goat |
JS0243 | Goat |
JS0261 | Goat |
JS0289.2 | Goat |
JS0292 | Goat |
JS0349 | Goat |
JS0424 | Goat |
LM0103 | Goat |
LM0163 | Goat |
LM0248 | Goat |
LM0266 | Goat |
LM0280 | Goat |
LM0322 | Goat |
LM0329 | Goat |
LM0349 | Goat |
NJ0013 | Goat |
NN0092.2 | Goat |
NN0211 | Goat |
NN0229 | Goat |
NN0236 | Goat |
NN0251 | Goat |
NN0272.1 | Goat |
NN0272.1.2 | Goat |
NN0276.2 | Goat |
NN0361 | Goat |
NN0362 | Goat |
NN0374 | Goat |
NN0375 | Goat |
AN0007 | Sheep |
AN0021 | Sheep |
AN0029 | Sheep |
BO1000 | Sheep |
BO1004 | Sheep |
BO1006 | Sheep |
BO1008 | Sheep |
BO1011 | Sheep |
BO1017 | Sheep |
BO1020 | Sheep |
BO1021 | Sheep |
BO1024 | Sheep |
BO1025 | Sheep |
BO1027 | Sheep |
BO1032 | Sheep |
BO1034 | Sheep |
BO1036 | Sheep |
BO1046 | Sheep |
BO1048 | Sheep |
BO1049 | Sheep |
BO1050 | Sheep |
BO1051 | Sheep |
BO1059 | Sheep |
BO1070 | Sheep |
BO1082 | Sheep |
BO1085 | Sheep |
BO1086 | Sheep |
BO1087 | Sheep |
BO1096 | Sheep |
CJ1001 | Sheep |
CJ1004 | Sheep |
CJ1015 | Sheep |
EM1017 | Sheep |
EM1020 | Sheep |
EM1032 | Sheep |
EM1034 | Sheep |
EM1037 | Sheep |
EM1038 | Sheep |
EM1039 | Sheep |
EM1046 | Sheep |
EM1048 | Sheep |
EM1075 | Sheep |
EM1076 | Sheep |
EM1081 | Sheep |
EM1087 | Sheep |
EM1090 | Sheep |
EM1094 | Sheep |
EM1102 | Sheep |
EM1104 | Sheep |
EO0046 | Sheep |
EO0074 | Sheep |
HK0011 | Sheep |
HK0033 | Sheep |
HK0154 | Sheep |
HK0260 | Sheep |
HK0329 | Sheep |
JO1013 | Sheep |
JO1015 | Sheep |
JO1020 | Sheep |
JO1022 | Sheep |
JO1024 | Sheep |
JO1027 | Sheep |
JO1028 | Sheep |
JO1030 | Sheep |
JO1035 | Sheep |
JO1036 | Sheep |
JO1038 | Sheep |
JO1039.1 | Sheep |
JO1039.3 | Sheep |
JO1039.5a | Sheep |
JO1041 | Sheep |
JO1042 | Sheep |
JO1052 | Sheep |
JO1057 | Sheep |
JO1059 | Sheep |
JO1064 | Sheep |
JO1082 | Sheep |
JO1084 | Sheep |
JO1085 | Sheep |
JO1086 | Sheep |
JO1094 | Sheep |
JS0046 | Sheep |
JS0140 | Sheep |
JS0147 | Sheep |
JS0207 | Sheep |
JS0209 | Sheep |
JS0210.2 | Sheep |
JS0249 | Sheep |
JS0262 | Sheep |
JS0263 | Sheep |
JS0265 | Sheep |
JS0271 | Sheep |
JS0272 | Sheep |
JS0273 | Sheep |
JS0281 | Sheep |
JS0285 | Sheep |
JS0289.1 | Sheep |
JS0290 | Sheep |
JS0291 | Sheep |
JS0301 | Sheep |
JS0311 | Sheep |
JS0314 | Sheep |
JS0344 | Sheep |
JS0419 | Sheep |
LM0018 | Sheep |
LM0020 | Sheep |
LM0104 | Sheep |
LM0114 | Sheep |
LM0139 | Sheep |
LM0289 | Sheep |
LM0295 | Sheep |
LM0297 | Sheep |
LM0298 | Sheep |
LM0299 | Sheep |
LM0311 | Sheep |
LM0336 | Sheep |
LM0339 | Sheep |
NJ0008 | Sheep |
NJ0032 | Sheep |
NJ0044 | Sheep |
NN0007 | Sheep |
NN0021 | Sheep |
NN0022 | Sheep |
NN0106 | Sheep |
NN0179 | Sheep |
NN0180.1 | Sheep |
NN0189 | Sheep |
NN0196 | Sheep |
NN0197 | Sheep |
NN0200 | Sheep |
NN0201 | Sheep |
NN0202 | Sheep |
NN0203 | Sheep |
NN0204 | Sheep |
NN0205 | Sheep |
NN0207.1 | Sheep |
NN0212 | Sheep |
NN0214 | Sheep |
NN0217 | Sheep |
NN0224 | Sheep |
NN0234 | Sheep |
NN0316 | Sheep |
NN0329 | Sheep |
NN0345 | Sheep |
NN0377 | Sheep |
Category | Description | Organic Matter (% DM) | Ash (% DM) | Protein (% DM) | NDF (% DM) | ADF (% DM) | ADL (% DM) | Starch (% DM) | Ether Extract (% DM) | Gross Energy (MJ kg-¹ DM) | Metabolizable Energy (MJ kg-¹ DM) | DM digestibility | Feeds’ examples (incl. industrial by-products) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
I | High-digestible energy feeds (non-fibre carbohydrate sources: starch & sugars) | ~96–98 | ~2–4 | 8–12 | Low (10–20) | Low (4–8) | Very low (0.5–2) | High (50–75) | Low (2–4) | ~17–19 | 12–14 | Very high (>80 %) | Maize grain, barley, wheat, wheat bran, rice bran, hominy feed, molasses, beet pulp, citrus pulp |
II | High-digestible energy feeds (fat/oil sources) | >99 | <1 | <5 | Very low (<5) | Very low (<2) | Negligible (<0.5) | Negligible (<1) | Very high (80–99) | ~30–38 | 20–25 | Very high (>90 %) | Vegetable oils (and residues), tallow, animal fats, protected fats |
III | High-protein feeds | ~92–95 | ~5–8 | 35–50 | Low–moderate (10–30) | Low–moderate (6–10) | Low (1–3) | Low (<5) | Low–moderate (2–6) | ~18–20 | 11–13 | Very high (>75 %) | Soybean meal, canola meal, cottonseed meal, sunflower meal, distillers’ grains, fish meal, meat & bone meal, blood meal |
IV | Good-quality forages & moderately digestible by-products | ~90–93 | ~7–10 | 12–20 | Moderate (40–50) | Moderate (25–35) | Moderate (3–6) | Low (1–5) | Low (1–3) | ~16–18 | 8–11 | Moderate (55–70 %) | Young grass hay, good-quality silages, dried beet tops, peanut hulls, soybean hulls |
V | Poor-quality roughages & fibrous by-products | ~90–94 | ~6–10 | 3–8 | High (60–75) | High (40–50) | High (6–10) | Very low (<2) | Very low (<2) | ~15–17 | 5–8 | Low (<50 %) | Mature pasture, straw, bagasse, maize stover, rice hulls, peanut shells, cottonseed hulls |
VI | Mineral/vitamin supplements | ~50 (variable) | ~50–95 | Negligible | Negligible | Negligible | Negligible | Negligible | Negligible | Negligible | Negligible | Highly digestible (N/A) | Mineral premixes, vitamin premixes, salt licks, bone meal, mineralised animal by-products |
###############################################################################
## Draw updated feed‑classification decision tree (DiagrammeR)
###############################################################################
# install.packages("DiagrammeR") # run once if not installed
library(DiagrammeR)
## Warning: package 'DiagrammeR' was built under R version 4.4.3
grViz("
digraph feed_tree {
graph [rankdir = TB, fontsize = 10, fontname = Helvetica]
node [shape = rectangle, style = filled, fillcolor = GhostWhite,
fontname = Helvetica, fontsize = 9]
start [label = 'Start\\n(NDF, CP, Ash, EE, GE)']
VI_q [label = 'Ash > 500 g/kg ?']
II_q [label = 'EE ≥ 300 g/kg\\nAND GE > 25 MJ/kg ?']
III_q [label = 'CP ≥ 300 g/kg ?']
V_q [label = 'NDF ≥ 600 g/kg ?']
IV_q [label = '400 ≤ NDF < 600 ?']
I_chk [label = 'Low fibre (<400) &\\nCP<300 & EE<300 & Ash≤500 &\\nGE 15–25 (if known) ?',
fontsize = 8]
VI [label = 'VI | Mineral / vitamin', fillcolor = LemonChiffon]
II [label = 'II | Fat / oil energy', fillcolor = LemonChiffon]
III [label = 'III | High‑protein', fillcolor = LemonChiffon]
V [label = 'V | Poor roughage', fillcolor = LemonChiffon]
IV [label = 'IV | Good forage /\\nmoderate by‑product', fillcolor = LemonChiffon]
I [label = 'I | Starch / sugar energy', fillcolor = LemonChiffon]
NAx [label = 'NA | Unclassified', fillcolor = Tomato, fontcolor = white]
start -> VI_q
VI_q -> VI [label = 'Yes']
VI_q -> II_q [label = 'No']
II_q -> II [label = 'Yes']
II_q -> III_q [label = 'No']
III_q -> III [label = 'Yes']
III_q -> V_q [label = 'No']
V_q -> V [label = 'Yes']
V_q -> IV_q [label = 'No']
IV_q -> IV [label = 'Yes']
IV_q -> I_chk [label = 'No']
I_chk -> I [label = 'Yes']
I_chk -> NAx [label = 'No']
}
")
###############################################################################
################################################################################
## 17‑new) Ingredient‑level feed‑class assignment
## • averages all rows of an ingredient first
## • NDF / CP / Ash / EE in g kg‑¹ DM ; GE in MJ kg‑¹ DM
################################################################################
library(dplyr)
library(tidyr)
vars_needed <- c("NDF", "CP", "Ash", "EE", "GE")
# ── 1 · Build one mean row per ingredient ────────────────────────────────────
nutr_wide <- merged_metadata$Animals.Diet.Comp %>%
filter(DC.Variable %in% vars_needed, !is_entire_diet) %>%
mutate(DC.Value = as.numeric(DC.Value)) %>%
group_by(D.Item, DC.Variable) %>% # ← only D.Item, no B.Code
summarise(val = mean(DC.Value, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = DC.Variable, values_from = val)
# ── 2 · Explicit six‑class decision rules ------------------------------------
classify_feed_gkg <- function(NDF, CP, Ash, EE, GE){
ok <- function(x) !is.na(x)
nOK <- sum(ok(NDF), ok(CP), ok(Ash), ok(EE), ok(GE))
if(ok(Ash) && Ash > 500) return("VI") # mineral
if(ok(EE) && EE >= 300 && ok(GE) && GE > 25) return("II") # fat/oil
if(ok(CP) && CP >= 300) return("III") # protein
if(ok(NDF) && NDF >= 600) return("V") # poor roughage
if(ok(NDF) && NDF >= 400 && NDF < 600) return("IV") # good forage
if(nOK > 0 &&
(!ok(NDF) || NDF < 400) &&
(!ok(CP) || CP < 300) &&
(!ok(EE) || EE < 300) &&
(!ok(Ash) || Ash <= 500) &&
(!ok(GE) || (GE >= 15 & GE <= 25))) return("I") # starch/sugar
NA_character_ # unclassified
}
# ── 3 · Apply the classifier --------------------------------------------------
nutr_wide <- nutr_wide %>%
rowwise() %>%
mutate(Category = classify_feed_gkg(NDF, CP, Ash, EE, GE)) %>%
ungroup()
# ── 4 · Carry the ingredient‑level Category back into your data --------------
# (join only on D.Item because the class is the same for every paper)
merged_metadata$Animals.Diet.Comp <- merged_metadata$Animals.Diet.Comp %>%
left_join(nutr_wide %>% select(D.Item, Category), by = "D.Item")
if (exists("ingredients")) {
ingredients <- ingredients %>%
left_join(nutr_wide %>% select(D.Item, Category), by = "D.Item")
}
# ── 5 · Quick audit ----------------------------------------------------------
table(nutr_wide$Category, useNA = "ifany")
##
## I II III IV V <NA>
## 597 9 138 273 264 646
################################################################################
supplements
## A · one authoritative lookup ------------------------------------------------
type_lookup <- merged_metadata$Animals.Diet %>%
distinct(B.Code, D.Item, D.Type)
vi_types <- c("Non-ERA Feed", "Supplement")
vi_regex <- regex("supplement", ignore_case = TRUE)
set_vi <- function(df){
df2 <- df
# add D.Type if it isn't there yet
if(!"D.Type" %in% names(df2)){
df2 <- df2 %>% left_join(type_lookup, by = c("B.Code", "D.Item"))
}
# if D.Type still missing, just leave Category as-is
if(!"D.Type" %in% names(df2)){
warning("D.Type still absent after join – Category unchanged for this table")
return(df2)
}
df2 %>%
mutate(
Category = if_else(
!is.na(D.Type) &
(D.Type %in% vi_types | str_detect(D.Type, vi_regex)),
"VI",
Category
)
)
}
## B · run it on both tables ---------------------------------------------------
merged_metadata$Animals.Diet.Comp <- set_vi(merged_metadata$Animals.Diet.Comp)
## Warning in left_join(., type_lookup, by = c("B.Code", "D.Item")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 279 of `x` matches multiple rows in `y`.
## ℹ Row 5198 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
if (exists("ingredients")) {
ingredients <- set_vi(ingredients)
}
## optional sanity check
table(merged_metadata$Animals.Diet.Comp$Category, useNA = "ifany")
##
## I II III IV V VI <NA>
## 79495 704 47565 42525 60043 73472 217534
###############################################################################
## A · rebuild / refresh Category column in nutr_wide --------------------------
nutr_wide <- nutr_wide %>%
select(-Category) %>% # drop any old class
left_join(
merged_metadata$Animals.Diet.Comp %>%
distinct(D.Item, Category), # <- no B.Code here
by = "D.Item"
)
## B · 1) overall share of rows that now have a class --------------------------
overall <- nutr_wide %>%
summarise(
Total_rows = n(),
Assigned_rows = sum(!is.na(Category)),
`% assigned` = round(100 * Assigned_rows / Total_rows, 1)
)
kable(overall, caption = "Overall coverage of the Category field")
Total_rows | Assigned_rows | % assigned |
---|---|---|
1958 | 1392 | 71.1 |
## C · 2) number of **unique** ingredients per category ------------------------
uniq_per_cat <- nutr_wide %>%
filter(!is.na(Category)) %>%
group_by(Category) %>%
summarise(
Unique_ingredients = n_distinct(D.Item),
Rows = n(), # row count if you still want it
.groups = "drop"
) %>%
arrange(Category)
kable(uniq_per_cat, caption = "Unique ingredients by category")
Category | Unique_ingredients | Rows |
---|---|---|
I | 594 | 594 |
II | 8 | 8 |
III | 135 | 135 |
IV | 273 | 273 |
V | 263 | 263 |
VI | 119 | 119 |
## D · 3) ten *random* ingredients in each category ---------------------------
## D · 3) ten *random* ingredients in each category ---------------------------
set.seed(222) # remove or change for a different random draw
rand10 <- nutr_wide %>%
filter(!is.na(Category)) %>%
group_by(Category) %>%
slice_sample(prop = 1) %>% # randomise the whole group
slice_head(n = 10) %>% # keep up to 10 rows
mutate(rank = row_number()) %>%
ungroup()
rand10_wide <- rand10 %>%
select(Category, rank, D.Item) %>%
tidyr::pivot_wider(names_from = rank, values_from = D.Item)
kable(rand10_wide,
caption = "Ten random ingredients in every category")
Category | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
---|---|---|---|---|---|---|---|---|---|---|
I | Gamba grass | 50% starter | Hominy | Sweet Potato Peels | Grower Diet 1 | Cotton Seed Cake||Milling;Undecorticated | Calliandra calothyrsus Leaves Dried(Air,Shade) Ground | Finisher | Pithecellobium dulce Leaves Dried(Sun) | Grewia similis Dried |
II | Sesame Seed Dried Ground Soaked | Sunflower Seed | Sunflower Seed Ground | Sesbania sesban Seed | Sesame Seed Ground | Sesame Seed | Sesame Seed Cake Autoclaved Dried Ground | Sunflower Ground | NA | NA |
III | Corn Steep Liquor | Soybean Meal | Winged Bean Meal Pelleted | Sun-dried Fish Meal | Soybean||Milling | Termite Dried Ground | Cotton Seed Cake Ground | Sesbania sesban Leaves Dried(Air) | Linseed Cake | Canola Cake |
IV | Oat||Drying | Pasture | Ficus exasperata | Maize||Chopping||Ensilation | Wheat||Milling | Tribulus terrestris | Euphorbia hirta | Poultry Litter Deep Stacked Dried | Groundnut Haulm | Cocoa Bean Shell Dried Ground |
V | Grass Dried | HSB silage | Unspecified Grass | Native Grass||Chopping||Drying | Centrosema pubescens Leaves | Andropogon | Forest Pasture | Pennisetum purpureum Wild Chopped | Trifolium alexandrinum Straw||Urea Treatment | 3% Urea treated maize stover with 5% molasses |
VI | Monocalcium Phosphate | Kanwa-Potassium Salt | Elancoban | Minerals and Vitamins Premix | Avatec | Free Gossypol | Calcium Oxide | Mineral Lick | Binder | Bone Ash |
###############################################################################
## E · Ingredients that remain unclassified (Category == NA)
###############################################################################
###############################################################################
## E · Ingredients still unclassified, counted by unique study
## (“one vote per B.Code”)
###############################################################################
unclass_freq <- merged_metadata$Animals.Diet.Comp %>%
filter(is.na(Category)) %>%
filter(is_entire_diet=="FALSE")%>%# keep only the NA‑class rows
distinct(D.Item, B.Code) %>% # one row per ingredient *per study*
count(D.Item, sort = TRUE, name = "Studies") # how many studies
kable(head(unclass_freq, 20),
caption = "Top 20 unclassified ingredients (ranked by # of studies)")
D.Item | Studies |
---|---|
Water | 232 |
Salt | 173 |
Concentrate | 58 |
Wheat Offal | 46 |
Maize Meal | 35 |
Soybean Oil | 21 |
Blood Meal | 17 |
Vegetable Oil | 17 |
Palm Oil | 16 |
Cod Liver Oil | 12 |
Rice Offal | 12 |
Lucerne | 11 |
Oyster Shells | 11 |
Sunflower Oil | 11 |
Maize Oil | 10 |
Unspecified Molasses Meal | 10 |
Fish Oil | 9 |
Salt Block | 9 |
Salt Lick | 7 |
Cashew Shell | 6 |
# ──────────────────────────────────────────────────────────────────────────────
# 0 · Pre-processing helpers ----------------------------------------------------
# Convert D.Amount to numeric and drop strict duplicates that would otherwise
# blow up the joins further down the pipeline.
# ──────────────────────────────────────────────────────────────────────────────
ingredients_clean <- ingredients %>%
mutate(D.Amount = as.numeric(D.Amount)) %>%
distinct(B.Code, A.Level.Name, D.Item, D.Amount,
D.Unit.Amount, D.Unit.Time, D.Unit.Animals,
T.Control, # <- keep it
.keep_all = TRUE)
# Optional sanity check ---------------------------------------------------------
dup_left <- ingredients_clean %>%
count(B.Code, A.Level.Name, D.Item, D.Amount,
D.Unit.Amount, D.Unit.Time, D.Unit.Animals) %>%
filter(n > 1)
if (nrow(dup_left) == 0) {
message("✅ all identical-amount duplicates removed.")
} else {
message("⚠️ still some duplicates—investigate dup_left.")
}
## ⚠️ still some duplicates—investigate dup_left.
# ──────────────────────────────────────────────────────────────────────────────
# quantify_practice_changes() ---------------------------------------------------
# Computes how one diet changes relative to another within each study.
# * Works diet-by-diet, study-by-study (keyed by B.Code)
# * Collapses duplicate ingredient rows *within* each diet before joining
# * Avoids Cartesian explosions without requiring allow.cartesian = TRUE
# ──────────────────────────────────────────────────────────────────────────────
quantify_practice_changes <- function(ingredients_clean, min_pct = 0.5) {
ing <- as.data.table(copy(ingredients_clean)) # never modify caller’s object
#── 0 · basic housekeeping ---------------------------------------------------
ing[, D.Amount := as.numeric(D.Amount)]
ing[, T.Control := tolower(trimws(T.Control))]
# kg → g so we have only one mass basis to compare
ing[D.Unit.Amount == "kg", `:=`(D.Amount = D.Amount * 1000,
D.Unit.Amount = "g")]
# % of diet DM per ingredient (computed diet-by-diet)
ing[, total_g := sum(D.Amount, na.rm = TRUE), by = .(B.Code, A.Level.Name)]
ing[, pct_diet := (D.Amount / total_g) * 100]
#── 1 · work study-by-study --------------------------------------------------
result <- ing[, {
dt <- .SD # only rows for this B.Code (paper)
# Identify control diets BEFORE collapsing, because we need T.Control ----
ctrl <- unique(dt[T.Control == "yes", A.Level.Name])
diets <- unique(dt$A.Level.Name)
# Collapse duplicate ingredient rows *within* each diet -------------------
dt <- dt[, .(
Category = first(Category), # keep the category
pct_diet = sum(pct_diet),
D.Unit.Amount = first(D.Unit.Amount),
D.Unit.Time = first(D.Unit.Time),
D.Unit.Animals = first(D.Unit.Animals)
), by = .(A.Level.Name, D.Item)]
# Build comparison pairs --------------------------------------------------
pairs <- list()
if (length(ctrl)) {
trt <- setdiff(diets, ctrl)
if (length(trt)) {
for (c_diet in ctrl) {
for (t_diet in trt) {
pairs[[length(pairs) + 1]] <- c(c_diet, t_diet)
}
}
}
}
if (length(diets) > 1) {
pairs <- c(pairs, combn(diets, 2, simplify = FALSE))
}
# If no pairs, emit empty DT ---------------------------------------------
if (length(pairs) == 0) {
NULL
} else {
rbindlist(lapply(pairs, function(p) {
diet_A <- p[1]; diet_B <- p[2]
dA <- dt[A.Level.Name == diet_A]
dB <- dt[A.Level.Name == diet_B]
grid <- merge(
dA[, .(D.Item, Category, pct_A = pct_diet,
unit_A = paste(D.Unit.Amount, D.Unit.Time, D.Unit.Animals))],
dB[, .(D.Item, Category, pct_B = pct_diet,
unit_B = paste(D.Unit.Amount, D.Unit.Time, D.Unit.Animals))],
by = c("D.Item", "Category"), all = TRUE, allow.cartesian = FALSE
)
# Unit compatibility check -------------------------------------------
grid[, unit_ok := (is.na(unit_A) | is.na(unit_B) | unit_A == unit_B)]
unit_mismatch <- grid[unit_ok == FALSE, D.Item]
grid <- grid[unit_ok == TRUE]
# Fill NAs, compute deltas, apply threshold ---------------------------
grid[is.na(pct_A), pct_A := 0]
grid[is.na(pct_B), pct_B := 0]
grid[, delta := pct_B - pct_A]
grid <- grid[abs(delta) >= min_pct]
if (nrow(grid) == 0) return(NULL)
# Aggregate change metrics (overall) ----------------------------------
total_added <- grid[pct_A == 0 & delta > 0, sum(delta)]
total_removed <- grid[pct_B == 0 & delta < 0, -sum(delta)]
total_increase <- grid[pct_A > 0 & pct_B > 0 & delta > 0, sum(delta)]
total_decrease <- grid[pct_A > 0 & pct_B > 0 & delta < 0, -sum(delta)]
n_changed <- nrow(grid)
# Aggregate *by D.Type_Nutri* ----------------------------------------------
fmt <- function(dt) {
if (nrow(dt) == 0) return(NA_character_)
paste(paste0(dt$Category, ":", round(dt$V1,2)), collapse = "; ")
}
add_by_type <- fmt(grid[pct_A == 0 & delta > 0, .(V1 = sum(delta)), by = Category])
rem_by_type <- fmt(grid[pct_B == 0 & delta < 0, .(V1 = -sum(delta)), by = Category])
inc_by_type <- fmt(grid[pct_A > 0 & pct_B > 0 & delta > 0, .(V1 = sum(delta)), by = Category])
dec_by_type <- fmt(grid[pct_A > 0 & pct_B > 0 & delta < 0, .(V1 = -sum(delta)), by = Category])
# Practice label ------------------------------------------------------
practice_type <- fifelse(total_added > 0 & total_removed > 0, "substitution",
fifelse(total_added > 0 & total_removed == 0, "addition",
fifelse(total_added == 0 & total_removed > 0, "removal",
"dosage_change")))
compare_to <- if (diet_A %in% ctrl) "control" else paste0("other_trt:", diet_A)
data.table(
diet_A = diet_A,
diet_B = diet_B,
compare_to = compare_to,
practice_type = practice_type,
total_pct_added = round(total_added, 2),
total_pct_removed = round(total_removed, 2),
total_pct_increase = round(total_increase,2),
total_pct_decrease = round(total_decrease,2),
added_by_type = add_by_type,
removed_by_type = rem_by_type,
increase_by_type = inc_by_type,
decrease_by_type = dec_by_type,
n_ingredients_changed= n_changed,
unit_mismatch = paste(unit_mismatch, collapse = "; ")
)
}), fill = TRUE)
}
}, by = .(B.Code)]
# final column order ---------------------------------------------------------
setcolorder(result,
c("B.Code","diet_A","diet_B","compare_to","practice_type",
"total_pct_added","total_pct_removed",
"total_pct_increase","total_pct_decrease",
"added_by_type","removed_by_type",
"increase_by_type","decrease_by_type",
"n_ingredients_changed","unit_mismatch"))
as.data.frame(result)
}
changes <- quantify_practice_changes(ingredients_clean, min_pct = 1) # 1 % threshold
changes<- changes %>%
mutate(pair_id = map2_chr(diet_A, diet_B, ~paste(sort(c(.x, .y)), collapse = "|"))) %>%
distinct(B.Code, pair_id, .keep_all = TRUE) %>% # keep the first orientation
select(-pair_id)
# ── 1 · Helper: explode one “…_by_type” column into tidy rows ─────────────
explode <- function(.data, col, kind) { # { 1
.data %>%
select(B.Code, diet_A, diet_B, all_of(col)) %>%
mutate(pair_id = paste(diet_A, "→", diet_B)) %>% # tag each comparison
mutate(across(all_of(col), ~ ifelse(is.na(.x) | .x == "", NA, .x))) %>%
separate_rows(all_of(col), sep = ";\\s*") %>% # "A:10; B:20" → rows
filter(!is.na(.data[[col]])) %>%
separate(.data[[col]], into = c("Category", "pct"), # "A:10" → A | 10
sep = ":", convert = TRUE, fill = "right") %>%
mutate(
# ─ collapse categories ────────────────────────────────────────────
Category = case_when(
str_detect(Category, regex("supplement", ignore_case = TRUE)) ~ "Supplement",
str_detect(Category, regex("concentrate", ignore_case = TRUE)) ~ "Concentrate",
TRUE ~ Category
),
change_kind = kind
) %>%
filter(Category != "Entire Diet") # drop whole-diet rows
} # } 1
# ── 2 · Build a long table of ingredient-type moves ────────────────────
type_changes <- bind_rows( # ( 2
explode(changes, "added_by_type", "added"),
explode(changes, "removed_by_type", "removed"),
explode(changes, "increase_by_type", "increase"),
explode(changes, "decrease_by_type", "decrease")
) # ) 2
# ── 3 · Count how many comparisons show each Practice × Type combo ────
summary <- type_changes %>%
group_by(Category, change_kind) %>%
summarise(n_comparisons = n_distinct(pair_id), .groups = "drop")
# ── 4 · Plot: bar height = number of comparisons ───────────────────────
ggplot(summary,
aes(Category, n_comparisons, fill = change_kind)) +
geom_col(position = "dodge") +
labs(title = "How often each practice affected each ingredient type",
x = "Ingredient type",
y = "# of diet-pair comparisons",
fill = "Change kind") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top")
changes %>%
mutate(abs_delta = case_when(
practice_type == "addition" ~ total_pct_added,
practice_type == "removal" ~ total_pct_removed,
practice_type == "substitution" ~ pmax(total_pct_added,
total_pct_removed),
practice_type == "dosage_change" ~ pmax(total_pct_increase,
total_pct_decrease)
)) %>%
ggplot(aes(practice_type, abs_delta, fill = practice_type)) +
geom_violin(trim = FALSE, alpha = .4, colour = NA) +
geom_boxplot(width = .15, outlier.shape = NA) +
stat_summary(fun = median, geom = "point", size = 2, colour = "black") +
labs(x = NULL, y = "Magnitude (%-points of diet DM)",
title = "Distribution of practice intensities") +
theme_bw() +
theme(legend.position = "none")
To estimate enteric methane emissions, a methane conversion factor (Ym) is applied to the gross energy (GE) intake of livestock. Ym represents the proportion of the animal’s gross energy intake that is converted to methane and varies based on the species, productivity level, and diet composition. The IPCC (2019) provides updated Ym values based on feed digestibility and forage content. Below is the logic applied:
kable(
data.frame(
Category = c("Dairy Cattle (High)", "Dairy Cattle (Medium)", "Dairy Cattle (Low)", "Dairy Cattle (Unknown)",
"Meat Cattle (>75% forage)", "Meat Cattle (15–75% forage)", "Meat Cattle (<15% forage)", "Meat Cattle (Unknown)",
"Sheep", "Lambs", "Goats"),
Ym_Percent = c(5.7, 6.3, 6.5, 6.0, 7.0, 6.3, 4.0, 6.5, 6.7, 4.5, 5.5),
Condition = c("> 8500 kg milk/year", "5000–8500 kg milk/year", "< 5000 kg milk/year", "Unknown milk yield",
"Forage > 75%", "Forage 15–75%", "Forage < 15%", "Unknown forage %",
"Default for Sheep", "If 'lamb' in Herd.Stage", "Default for Goats")
),
caption = "Assigned Ym Values Based on IPCC 2019 Guidelines and Available Data"
)
Category | Ym_Percent | Condition |
---|---|---|
Dairy Cattle (High) | 5.7 | > 8500 kg milk/year |
Dairy Cattle (Medium) | 6.3 | 5000–8500 kg milk/year |
Dairy Cattle (Low) | 6.5 | < 5000 kg milk/year |
Dairy Cattle (Unknown) | 6.0 | Unknown milk yield |
Meat Cattle (>75% forage) | 7.0 | Forage > 75% |
Meat Cattle (15–75% forage) | 6.3 | Forage 15–75% |
Meat Cattle (<15% forage) | 4.0 | Forage < 15% |
Meat Cattle (Unknown) | 6.5 | Unknown forage % |
Sheep | 6.7 | Default for Sheep |
Lambs | 4.5 | If ‘lamb’ in Herd.Stage |
Goats | 5.5 | Default for Goats |
# Step 1: Identify purpose of each B.Code from the production output table
# Dairy = Milk Yield, Meat = Meat Yield
#harmonize yield data units
yield_data<-merged_metadata$Data.Out%>%
filter(Out.Subind %in% c("Meat Yield","Milk Yield"))
yield_data <- yield_data %>%
mutate(
Out.Unit = tolower(trimws(Out.Unit)), # Normalize casing and whitespace
Out.Unit = case_when(
# Common direct mappings
Out.Unit %in% c("kg", "kg/d", "kg/day", "kg/day/individual", "kg/individual/day",
"kg/d/individual", "kg/individual") ~ "kg/individual/day",
Out.Unit %in% c("g", "g/d", "g/day", "g/day/individual", "g/d/individual",
"g/individual/day", "g/individual") ~ "g/individual/day",
Out.Unit %in% c("mg/d/individual") ~ "mg/individual/day",
Out.Unit %in% c("l/d", "l/day", "l/d/individual", "l/individual/day", "L/individual/day",
"l/individual") ~ "l/individual/day",
Out.Unit %in% c("ml/d/individual", "ml/individual", "ml/individual/day") ~ "ml/individual/day",
# Composite or unclear units
Out.Unit == "g/kg/individual" ~ "g/kg/individual",
Out.Unit == "kg/replicate/experiment" ~ "kg/replicate/experiment",
TRUE ~ Out.Unit # fallback
)
)
yield_data <- yield_data %>%
mutate(
Out.Unit = tolower(trimws(Out.Unit)),
# Convert to standard units
ED.Mean.T = case_when(
Out.Unit == "g/individual/day" ~ ED.Mean.T / 1000, # g → kg
Out.Unit == "mg/individual/day" ~ ED.Mean.T / 1e6, # mg → kg
Out.Unit == "ml/individual/day" ~ ED.Mean.T / 1000, # ml → l
TRUE ~ ED.Mean.T # leave others
),
# Normalize units
Out.Unit = case_when(
Out.Unit %in% c("kg/individual/day", "g/individual/day", "mg/individual/day") ~ "kg/individual/day",
Out.Unit %in% c("l/individual/day", "ml/individual/day") ~ "l/individual/day",
TRUE ~ NA_character_ # mark invalid units
)
) %>%
# Keep only acceptable units
filter(Out.Unit %in% c("kg/individual/day", "l/individual/day"))
milk_yield <- yield_data %>%
filter(Out.Subind == "Milk Yield", Out.Unit == "kg/individual/day") %>%
group_by(B.Code) %>%
summarise(
milk_kg_year = mean(ED.Mean.T, na.rm = TRUE) * 365,
.groups = "drop"
)
# Step 1: Get purpose (Milk vs. Meat) per B.Code, prioritizing Milk if both exist
purpose_by_code <- merged_metadata$Data.Out %>%
filter(Out.Subind %in% c("Milk Yield", "Meat Yield")) %>%
mutate(
Purpose = case_when(
Out.Subind == "Milk Yield" ~ "Dairy",
Out.Subind == "Meat Yield" ~ "Meat",
TRUE ~ NA_character_
)
) %>%
group_by(B.Code) %>%
# Prioritize Dairy if both exist
summarise(Purpose = if ("Dairy" %in% Purpose) "Dairy" else first(Purpose), .groups = "drop")
ingredients_cleaned <- ingredients %>%
mutate(
D.Amount = ifelse(D.Unit.Amount == "g", D.Amount / 1000, D.Amount),
D.Unit.Amount = ifelse(D.Unit.Amount == "g", "kg", D.Unit.Amount)
)
# Step 3: Keep only diets where all units are now consistent (all in kg)
diet_valid_units <- ingredients_cleaned %>%
group_by(B.Code, A.Level.Name) %>%
filter(n_distinct(D.Unit.Amount, na.rm = TRUE) == 1) %>%
ungroup()
# Step 2: Tag forage types (you can expand this list as needed)
forage_types <- c("Forage Trees", "Crop Byproduct",
"Herbaceous Fodders", "Agroforestry Fodders")
forage_proportions <- diet_valid_units %>%
filter(D.Type != "Entire Diet") %>%
group_by(B.Code, A.Level.Name) %>%
summarise(
any_na = any(is.na(D.Amount)), # Check for NA in D.Amount
total_diet = ifelse(any_na, NA_real_, sum(D.Amount, na.rm = TRUE)),
total_forage = ifelse(any_na, NA_real_, sum(D.Amount[D.Type %in% forage_types], na.rm = TRUE)),
n_ingredients = n(),
is_forage = all(D.Type[1] %in% forage_types),
D.Type.first = D.Type[1],
D.Unit.Amount = first(D.Unit.Amount),
forage_prop = case_when(
n_ingredients == 1 & D.Type.first %in% forage_types ~ 1,
n_ingredients == 1 & !(D.Type.first %in% forage_types) ~ 0,
total_diet > 0 ~ total_forage / total_diet,
TRUE ~ NA_real_
),
.groups = "drop"
)
#
# ingredients <- ingredients %>% # your table
# # 1 ── throw away the old “source‑based” label -----------------------------
# select(-D.Type) %>% # drop the column
#
# # 2 ── remove any diet that still has an NA in D.Type_Nutri ----------------
# group_by(B.Code, T.Name) %>% # <<– keys that identify a single diet
# filter( all(!is.na(D.Type_Nutri)) ) %>% # keep diet only if none are NA
# ungroup()
Reconstructs the gross energy of the entire diet from the weighted average of GE values of individual ingredients, when diet-level GE is not reported.
# --- Calculate GE_diet (MJ/kg) from ingredient GE and amount ---
# 1. Join GE values to ingredient amounts
# --- Calculate GE_diet (MJ/kg) ONLY if all ingredients have GE ---
# 1. Join GE values to ingredient amounts
ingredients_with_GE <- ingredients %>%
filter(D.Type!="Entire Diet")%>%
mutate(Diet_ID = paste(B.Code, A.Level.Name, sep = "__")) %>%
left_join(
Gross_energy %>%
filter(!is_entire_diet) %>%
select(B.Code, D.Item, DC.Value),
by = c("B.Code", "D.Item")
) %>%
mutate(
D.Amount_kg = case_when(
D.Unit.Amount == "g" ~ D.Amount / 1000,
D.Unit.Amount == "kg" ~ D.Amount,
TRUE ~ NA_real_
)
)
# 2. Count ingredients with/without GE
ingredient_counts <- ingredients_with_GE %>%
group_by(Diet_ID) %>%
summarise(
n_total = n(),
n_with_GE = sum(!is.na(DC.Value)),
.groups = "drop"
) %>%
filter(n_total == n_with_GE)
# 3. Reconstruct GE_diet (MJ/kg)
GE_from_ingredients <- ingredients_with_GE %>%
semi_join(ingredient_counts, by = "Diet_ID") %>%
group_by(Diet_ID, B.Code, A.Level.Name) %>%
summarise(
GE_diet = sum(DC.Value * D.Amount_kg) / sum(D.Amount_kg),
.groups = "drop"
)
# 4. Use GE_diet when full diet-level GE is missing
# Join forage proportions into GE_diet_reconstructed
GE_diet_reconstructed <- GE_from_ingredients %>%
left_join(total_amounts %>%
select(B.Code, A.Level.Name, Total_Amount, D.Unit.Amount),
by = c("B.Code", "A.Level.Name")) %>%
left_join(merged_metadata$Prod.Out %>% select(B.Code, P.Product, Herd.Stage), by = "B.Code") %>%
left_join(purpose_by_code, by = "B.Code") %>%
left_join(forage_proportions %>% select(B.Code, A.Level.Name, forage_prop),
by = c("B.Code", "A.Level.Name")) %>%
left_join(milk_yield, by = "B.Code") %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
mutate(
Total_Amount_kg = case_when(
D.Unit.Amount == "g" ~ Total_Amount / 1000,
D.Unit.Amount == "kg" ~ Total_Amount,
TRUE ~ NA_real_
),
D.Unit.Amount = "kg", # <- FORCE ALL TO BE KG
Ym = case_when(
P.Product == "Cattle" & Purpose == "Dairy" & milk_kg_year > 8500 ~ 0.057,
P.Product == "Cattle" & Purpose == "Dairy" & milk_kg_year >= 5000 ~ 0.063,
P.Product == "Cattle" & Purpose == "Dairy" & milk_kg_year < 5000 ~ 0.065,
P.Product == "Cattle" & Purpose == "Dairy" & is.na(milk_kg_year) ~ 0.06,
P.Product == "Cattle" & Purpose == "Meat" & forage_prop > 0.75 ~ 0.07,
P.Product == "Cattle" & Purpose == "Meat" & forage_prop > 0.15 ~ 0.063,
P.Product == "Cattle" & Purpose == "Meat" & forage_prop >= 0 ~ 0.04,
P.Product == "Cattle" & Purpose == "Meat" & is.na(forage_prop) ~ 0.065,
P.Product == "Sheep" & grepl("lamb", Herd.Stage, ignore.case = TRUE) ~ 0.045,
P.Product == "Sheep" ~ 0.067,
P.Product == "Goat" ~ 0.055,
TRUE ~ NA_real_
),
GE_daily = ifelse(!is.na(GE_diet) & !is.na(Total_Amount_kg),
GE_diet * Total_Amount_kg,
NA_real_),
CH4_kg_year = ifelse(!is.na(GE_daily) & !is.na(Ym),
GE_daily * Ym * 365 / 55.65,
NA_real_),
CH4_flag = case_when(
is.na(GE_diet) ~ "missing_GE_diet",
is.na(Total_Amount_kg) ~ "missing_amount",
is.na(Ym) ~ "missing_Ym",
is.na(GE_daily) ~ "GE_not_calculated",
is.na(CH4_kg_year) ~ "CH4_not_calculated",
TRUE ~ "GE_diet_based"
),
source = "GE_diet_from_ingredients"
) %>%
rename(D.Item = A.Level.Name) %>%
select(B.Code, D.Item, P.Product, Herd.Stage, GE_daily, CH4_kg_year,
Total_Amount = Total_Amount_kg, D.Unit.Amount, ED.Mean.T = Total_Amount_kg,
CH4_flag, Ym, source)
Gross_energy_entire <- Gross_energy %>%
filter(is_entire_diet == TRUE) %>%
# Add feed amount and unit
left_join(
total_amounts %>% select(B.Code, A.Level.Name, Total_Amount, D.Unit.Amount),
by = c("B.Code" = "B.Code", "D.Item" = "A.Level.Name")
) %>%
# Add product info and herd stage
left_join(
merged_metadata$Prod.Out %>% select(B.Code, P.Product, Herd.Stage),
by = "B.Code"
) %>%
# Add dairy vs meat purpose
left_join(purpose_by_code, by = "B.Code") %>%
mutate(
# Convert Total_Amount to kg/day
Total_Amount = case_when(
D.Unit.Amount == "g" ~ Total_Amount / 1000,
D.Unit.Amount == "kg" ~ Total_Amount,
TRUE ~ NA_real_
),
# Fix unit
D.Unit.Amount = case_when(
D.Unit.Amount == "g" ~ "kg",
TRUE ~ D.Unit.Amount
),
# Diagnostic reason for failure
reason_missing = case_when(
is.na(Total_Amount) & D.Unit.Amount %in% c("g/kg", "%", "% Diet") ~ "Cannot convert unit to kg",
TRUE ~ NA_character_
),
# Gross energy intake
GE_daily = ifelse(
!is.na(Total_Amount) & !is.na(DC.Value),
DC.Value * Total_Amount,
NA_real_
)
)
# Make sure 'purpose_by_code' is unique per B.Code
purpose_by_code_unique <- purpose_by_code %>%
distinct(B.Code, Purpose)
# Then join safely before using 'Purpose'
GE_all <- Gross_energy_entire %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
left_join(forage_proportions %>% select(B.Code, A.Level.Name, forage_prop),
by = c("B.Code", "D.Item" = "A.Level.Name")) %>%
left_join(milk_yield, by = "B.Code") %>%
left_join(
feed_intake_diet %>%
select(B.Code, A.Level.Name, ED.Mean.T),
by = c("B.Code", "D.Item" = "A.Level.Name")
) %>%
mutate(
Total_Amount = if_else(!is.na(ED.Mean.T), ED.Mean.T, Total_Amount),
D.Unit.Amount = if_else(!is.na(ED.Mean.T), "kg", D.Unit.Amount),
Ym = case_when(
# --- Dairy cattle based on productivity ---
P.Product == "Cattle" & Purpose == "Dairy" & milk_kg_year > 8500 ~ 0.057,
P.Product == "Cattle" & Purpose == "Dairy" & milk_kg_year >= 5000 ~ 0.063,
P.Product == "Cattle" & Purpose == "Dairy" & milk_kg_year < 5000 ~ 0.065,
P.Product == "Cattle" & Purpose == "Dairy" & is.na(milk_kg_year) ~ 0.06,
# --- Meat cattle based on forage proportions ---
P.Product == "Cattle" & Purpose == "Meat" & forage_prop > 0.75 ~ 0.07,
P.Product == "Cattle" & Purpose == "Meat" & forage_prop > 0.15 ~ 0.063,
P.Product == "Cattle" & Purpose == "Meat" & forage_prop >= 0 ~ 0.04,
P.Product == "Cattle" & Purpose == "Meat" & is.na(forage_prop) ~ 0.065,
# --- Sheep & Lambs ---
P.Product == "Sheep" & grepl("lamb", Herd.Stage, ignore.case = TRUE) ~ 0.045,
P.Product == "Sheep" ~ 0.067,
# --- Goats ---
P.Product == "Goat" ~ 0.055,
TRUE ~ NA_real_
),
GE_daily = ifelse(!is.na(DC.Value) & !is.na(Total_Amount),
DC.Value * Total_Amount,
NA_real_),
CH4_kg_year = ifelse(!is.na(GE_daily) & !is.na(Ym),
GE_daily * Ym * 365 / 55.65,
NA_real_),
CH4_flag = case_when(
is.na(DC.Value) ~ "missing_GE_value",
is.na(Ym) ~ "missing_Ym",
is.na(Total_Amount) ~ "missing_amount",
is.na(GE_daily) ~ "GE_not_calculated",
is.na(CH4_kg_year) ~ "CH4_not_calculated",
!is.na(ED.Mean.T) ~ "amount_inferred from feed intake",
TRUE ~ "ok"
)
)
###############################################################################
## SINGLE CHUNK: Sum Ingredient Intake & Merge into Entire-Diet GE + CH₄ Calc ##
###############################################################################
# 1) Sum ingredient-level feed intake -----------------------------------------
ingredient_intake_summed <- feed_intake_ingredients %>%
group_by(B.Code, A.Level.Name) %>%
summarise(
sum_ingredient_intake = sum(ED.Mean.T, na.rm = TRUE),
.groups = "drop"
)
# 2) Merge this ingredient sum into the entire-diet GE table ------------------
# Then calculate final intake, daily GE, and CH₄.
GE_all_reconstructed <- Gross_energy_entire %>%
left_join(forage_proportions %>% select(B.Code, A.Level.Name, forage_prop),
by = c("B.Code", "D.Item" = "A.Level.Name")) %>%
left_join(milk_yield, by = "B.Code") %>%
# Add species info and keep only ruminants
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
# Merge the ingredient-level summed intake (aliasing A.Level.Name to D.Item)
left_join(
ingredient_intake_summed,
by = c("B.Code", "D.Item" = "A.Level.Name")
) %>%
# Compute CH₄-related fields
mutate(
# Methane conversion factor by species
Ym = case_when(
# --- Dairy cattle based on productivity ---
P.Product == "Cattle" & Purpose == "Dairy" & milk_kg_year > 8500 ~ 0.057,
P.Product == "Cattle" & Purpose == "Dairy" & milk_kg_year >= 5000 ~ 0.063,
P.Product == "Cattle" & Purpose == "Dairy" & milk_kg_year < 5000 ~ 0.065,
P.Product == "Cattle" & Purpose == "Dairy" & is.na(milk_kg_year) ~ 0.06,
# --- Meat cattle based on forage proportions ---
P.Product == "Cattle" & Purpose == "Meat" & forage_prop > 0.75 ~ 0.07,
P.Product == "Cattle" & Purpose == "Meat" & forage_prop > 0.15 ~ 0.063,
P.Product == "Cattle" & Purpose == "Meat" & forage_prop >= 0 ~ 0.04,
P.Product == "Cattle" & Purpose == "Meat" & is.na(forage_prop) ~ 0.065,
# --- Sheep & Lambs ---
P.Product == "Sheep" & grepl("lamb", Herd.Stage, ignore.case = TRUE) ~ 0.045,
P.Product == "Sheep" ~ 0.067,
# --- Goats ---
P.Product == "Goat" ~ 0.055,
TRUE ~ NA_real_
),
# Final daily intake (kg/day): pick the best available, but avoid using DC.Value
final_intake_kg_day = case_when(
!is.na(Total_Amount) & D.Unit.Amount == "kg" ~ Total_Amount,
!is.na(sum_ingredient_intake) ~ sum_ingredient_intake,
TRUE ~ NA_real_
),
# Track source of intake
final_intake_source = case_when(
!is.na(Total_Amount) & D.Unit.Amount == "kg" ~ "Total_Amount",
!is.na(sum_ingredient_intake) ~ "Total feed intake",
TRUE ~ "Missing"
),
# Daily GE intake (MJ/day) = GE content × daily intake
GE_daily = case_when(
!is.na(final_intake_kg_day) & !is.na(DC.Value) ~ DC.Value * final_intake_kg_day,
TRUE ~ NA_real_
),
# Annual CH₄ emissions
CH4_kg_year = ifelse(
!is.na(GE_daily) & !is.na(Ym),
GE_daily * Ym * 365 / 55.65,
NA_real_
),
# Flag potential issues
CH4_flag = case_when(
is.na(DC.Value) ~ "missing_GE_value",
is.na(Ym) ~ "missing_Ym",
is.na(final_intake_kg_day) ~ "missing_final_intake",
is.na(GE_daily) ~ "GE_not_calculated",
is.na(CH4_kg_year) ~ "CH4_not_calculated",
TRUE ~ "ok"
)
)
#combine
# 1. Tag and select relevant columns from both tables
GE_all_clean <- GE_all %>%
mutate(source = "GE_all") %>%
select(
B.Code,
D.Item,
P.Product,
Herd.Stage,
GE_daily,
CH4_kg_year,
Total_Amount,
D.Unit.Amount,
ED.Mean.T,
CH4_flag,
Ym,
source
)
GE_reconstructed_clean <- GE_all_reconstructed %>%
mutate(source = "Reconstructed") %>%
rename(ED.Mean.T = final_intake_kg_day) %>%
select(
B.Code,
D.Item,
P.Product,
Herd.Stage,
GE_daily,
CH4_kg_year,
Total_Amount,
D.Unit.Amount,
ED.Mean.T,
CH4_flag,
Ym,
source
)
GE_combined <- bind_rows(GE_all_clean, GE_reconstructed_clean, GE_diet_reconstructed) %>%
mutate(
source_priority = case_when(
source == "GE_all" ~ 1,
source == "Reconstructed" ~ 2,
source == "GE_diet_from_ingredients" ~ 3,
TRUE ~ 4
),
has_CH4 = !is.na(CH4_kg_year)
) %>%
arrange(B.Code, D.Item, desc(has_CH4), source_priority) %>%
distinct(B.Code, D.Item, .keep_all = TRUE) %>%
select(-has_CH4, -source_priority)
# Define non-emitting or pre-ruminant stages
non_emitting_stages <- c(
"Kid", "Kid Male", "Calf Female", "Weaned Calf Male",
"Young Male", "Young Female",
"Intact Kid Male"
)
# Filter them out
GE_combined <- GE_combined %>%
filter(!(Herd.Stage %in% non_emitting_stages))
GE_combined <- GE_combined %>%
left_join(ctrl_key, by = c("B.Code", "D.Item" = "A.Level.Name"))
GE_data <- GE_combined %>%
filter(!is.na(GE_daily) & GE_daily != "") %>%
distinct(B.Code)
# 3. Papers with GE and species (intersection)
GE_species <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
semi_join(GE_data, by = "B.Code") %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Studies_with_GE")
summary_table <- species_counts %>%
full_join(GE_species, by = "P.Product") %>%
mutate(across(everything(), ~replace_na(., 0)))
kable(summary_table, caption = "Summary of Data Availability for Enteric CH₄ Emissions (Tier 2)")
P.Product | Total_Studies | Studies_with_GE |
---|---|---|
Cattle | 127 | 43 |
Goat | 186 | 68 |
Sheep | 222 | 90 |
# Count unique B.Code with non-missing CH4 emissions
n_CH4_codes <- GE_combined %>%
filter(!is.na(CH4_kg_year)) %>%
distinct(B.Code) %>%
nrow()
n_CH4_codes
## [1] 186
n_CH4_codes_by_product <- GE_combined %>%
filter(!is.na(CH4_kg_year)) %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "n_CH4_codes")
n_CH4_codes_by_product
## P.Product n_CH4_codes
## <char> <int>
## 1: Cattle 28
## 2: Goat 68
## 3: Sheep 90
# Step 1: Get list of B.Codes from GE_combined
relevant_BCodes <- GE_combined %>%
distinct(B.Code)
# Step 2: Filter Animals.Diet.Comp for relevant B.Codes
diet_data_filtered <- merged_metadata$Animals.Diet %>%
semi_join(relevant_BCodes, by = "B.Code")
# Step 3: Count unique diets and ingredients
total_diets <- diet_data_filtered %>%
filter(!is.na(A.Level.Name)) %>%
distinct(B.Code, A.Level.Name) %>%
nrow()
total_ingredients <- diet_data_filtered %>%
filter(!is.na(D.Item)) %>%
distinct(B.Code, D.Item) %>%
nrow()
# Step 4: Output
cat("Total unique diets:", total_diets, "\n")
## Total unique diets: 1868
cat("Total unique ingredients:", total_ingredients, "\n")
## Total unique ingredients: 2686
total number of codes that have ge and have both treatment and control
# Identify B.Codes that contain both control and treatment diets
b_codes_with_both_ctrl_and_trt <- GE_combined %>%
filter(!is.na(CH4_kg_year)) %>%
filter(T.Control %in% c("yes", "no")) %>% # Only if T.Control is properly labeled
group_by(B.Code) %>%
summarise(n_control_status = n_distinct(T.Control), .groups = "drop") %>%
filter(n_control_status > 1) # Keep only those with both Yes and No
# Count of such B.Codes
n_both_ctrl_and_trt <- nrow(b_codes_with_both_ctrl_and_trt)
cat("Number of B.Codes with both control and treatment diets:", n_both_ctrl_and_trt, "\n")
## Number of B.Codes with both control and treatment diets: 89
papers that have no tagg for t control
check_control<-ingredients%>%
filter(is.na(T.Control))%>%
select(B.Code,A.Level.Name,D.Item,T.Control)
filter ge combined with only papers with both control and treatments
GE_combined <- GE_combined %>%
semi_join(b_codes_with_both_ctrl_and_trt, by = "B.Code")
GE_species <- merged_metadata$`Prod.Out` %>%
filter(P.Product %in% c("Cattle", "Sheep", "Goat")) %>%
semi_join(GE_data, by = "B.Code") %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "Studies_with_GE")
summary_table <- species_counts %>%
full_join(GE_species, by = "P.Product") %>%
mutate(across(everything(), ~replace_na(., 0)))
kable(summary_table, caption = "Summary of Data Availability for Enteric CH₄ Emissions (Tier 2)")
P.Product | Total_Studies | Studies_with_GE |
---|---|---|
Cattle | 127 | 43 |
Goat | 186 | 68 |
Sheep | 222 | 90 |
# Count unique B.Code with non-missing CH4 emissions
n_CH4_codes <- GE_combined %>%
filter(!is.na(CH4_kg_year)) %>%
distinct(B.Code) %>%
nrow()
n_CH4_codes
## [1] 89
n_CH4_codes_by_product <- GE_combined %>%
filter(!is.na(CH4_kg_year)) %>%
distinct(B.Code, P.Product) %>%
count(P.Product, name = "n_CH4_codes")
n_CH4_codes_by_product
## P.Product n_CH4_codes
## <char> <int>
## 1: Cattle 17
## 2: Goat 37
## 3: Sheep 35
relevant_BCodes <- GE_combined %>%
distinct(B.Code)
# Step 2: Filter Animals.Diet.Comp for relevant B.Codes
diet_data_filtered <- merged_metadata$Animals.Diet %>%
semi_join(relevant_BCodes, by = "B.Code")
# Step 3: Count unique diets and ingredients
total_diets <- diet_data_filtered %>%
filter(!is.na(A.Level.Name)) %>%
distinct(B.Code, A.Level.Name) %>%
nrow()
total_ingredients <- diet_data_filtered %>%
filter(!is.na(D.Item)) %>%
distinct(B.Code, D.Item) %>%
nrow()
# Step 4: Output
cat("Total unique diets:", total_diets, "\n")
## Total unique diets: 448
cat("Total unique ingredients:", total_ingredients, "\n")
## Total unique ingredients: 674
here we will have the same issue as with feed intake data since the Alevel in the data tab dont always latch the ones in the diet tab
# Step 1: Make sure the key columns match (D.Item and A.Level.Name)
GE_combined_for_merge <- GE_combined %>%
rename(A.Level.Name = D.Item)%>%
select(-"ED.Mean.T")
# Step 2: Merge yield and emission data
yield_with_emissions <- yield_data %>%
left_join(
GE_combined_for_merge,
by = c("B.Code", "A.Level.Name")
)
# distinct B.Code per product
bcode_per_product <- yield_with_emissions %>% # or yield_data
distinct(P.Product, B.Code) %>% # keep one row per pair
count(P.Product, name = "n_BCodes") # tally them
bcode_per_product
## P.Product n_BCodes
## <char> <int>
## 1: Cattle 17
## 2: Goat 13
## 3: Sheep 11
## 4: <NA> 309
# Filter meat yield data and drop missing values
yield_meat <- yield_with_emissions %>%
filter(Out.Subind == "Meat Yield") %>%
filter(!is.na(ED.Mean.T) & !is.na(CH4_kg_year) & !is.na(P.Product))
# Get axis limits
x_range <- range(yield_meat$ED.Mean.T, na.rm = TRUE)
y_range <- range(yield_meat$CH4_kg_year, na.rm = TRUE)
# Plot
ggplot(yield_meat, aes(x = ED.Mean.T, y = CH4_kg_year, color = P.Product)) +
geom_point(alpha = 0.8, size = 2.5) +
scale_x_log10(
limits = x_range,
breaks = scales::log_breaks(n = 5),
labels = scales::comma_format()
) +
scale_y_continuous(
limits = y_range,
labels = scales::comma_format()
) +
labs(
title = "CH4 Emissions vs. Meat Yield",
x = "Meat Yield per Individual (kg)",
y = "Annual CH4 Emissions (kg)",
color = "Livestock Species"
) +
theme_minimal() +
theme(legend.position = "bottom")
# Filter meat yield data and drop missing values
yield_milk <- yield_with_emissions %>%
filter(Out.Subind == "Milk Yield") %>%
filter(!is.na(ED.Mean.T) & !is.na(CH4_kg_year) & !is.na(P.Product))
# Get axis limits
x_range <- range(yield_milk$ED.Mean.T, na.rm = TRUE)
y_range <- range(yield_milk$CH4_kg_year, na.rm = TRUE)
# Plot
ggplot(yield_milk, aes(x = ED.Mean.T, y = CH4_kg_year, color = P.Product)) +
geom_point(alpha = 0.8, size = 2.5) +
scale_x_log10(
limits = x_range,
breaks = scales::log_breaks(n = 5),
labels = scales::comma_format()
) +
scale_y_continuous(
limits = y_range,
labels = scales::comma_format()
) +
labs(
title = "CH4 Emissions vs. Meat Yield",
x = "Milk Yield per Individual (kg)",
y = "Annual CH4 Emissions (kg)",
color = "Livestock Species"
) +
theme_minimal() +
theme(legend.position = "bottom")
# Filter and clean data
emissions_by_species <- yield_with_emissions %>%
filter(!is.na(CH4_kg_year), !is.na(P.Product))
# Boxplot with transparent points
ggplot(emissions_by_species, aes(x = P.Product, y = CH4_kg_year, fill = P.Product)) +
geom_boxplot(outlier.shape = NA, alpha = 0.4) + # lighter fill and no outliers
geom_jitter(width = 0.2, alpha = 0.5, color = "black", size = 2) + # transparent points
labs(
title = "Annual CH4 Emissions by Livestock Species",
x = "Species",
y = "Annual CH4 Emissions (kg)"
) +
theme_minimal() +
theme(legend.position = "none") # remove redundant legend
# Filter and clean data for Goat and Sheep only
emissions_by_species <- yield_with_emissions %>%
filter(!is.na(CH4_kg_year), P.Product %in% c("Goat", "Sheep"))
# Boxplot with transparent points for Goat and Sheep
ggplot(emissions_by_species, aes(x = P.Product, y = CH4_kg_year, fill = P.Product)) +
geom_boxplot(outlier.shape = NA, alpha = 0.4) +
geom_jitter(width = 0.2, alpha = 0.5, color = "black", size = 2) +
labs(
title = "Annual CH4 Emissions: Goat vs. Sheep",
x = "Species",
y = "Annual CH4 Emissions (kg)"
) +
theme_minimal() +
theme(legend.position = "none")
## 1) Live‑weight table --------------------------------------------------------
bw_tbl <- weights_unique %>%
select(B.Code, LiveWeight_kg = Out.WG.Start) %>%
filter(!is.na(LiveWeight_kg))
## 2) Merge with CH₄ and keep P.Product already present -----------------------
ch4_vs_bw <- GE_combined %>% # contains P.Product
filter(!is.na(CH4_kg_year)) %>%
left_join(bw_tbl, by = "B.Code") %>%
filter(!is.na(LiveWeight_kg) & LiveWeight_kg > 0)
## Warning in left_join(., bw_tbl, by = "B.Code"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 5 of `x` matches multiple rows in `y`.
## ℹ Row 3 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## 3) Plot ---------------------------------------------------------------------
##########################################################
## CH4 vs. live‑weight with 95 % confidence intervals ##
##########################################################
ggplot(ch4_vs_bw,
aes(x = LiveWeight_kg, y = CH4_kg_year, colour = P.Product)) +
geom_point(alpha = 0.65, size = 2.3) +
geom_smooth(method = "lm",
formula = y ~ log10(x), # linear in log‑weight
se = TRUE, # << confidence ribbon
linetype = "dashed",
linewidth = 1) +
scale_x_log10(breaks = scales::log_breaks(), labels = scales::comma) +
labs(
title = "Annual CH4 emissions vs. live weight",
x = "Body weight (kg, log scale)",
y = "CH₄ (kg animal⁻¹ year⁻¹)",
colour = "Species"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
wg<-merged_metadata$Data.Out%>%filter(Out.Subind=="Weight Gain")
###############################################################################
## STANDARDISE WEIGHT-GAIN TO kg animal⁻¹ day⁻¹ (no totals, no wk/mo) ##
###############################################################################
wg_clean <- wg %>%
## 1. normalise text ---------------------------------------------------------
mutate(
unit_raw = tolower(trimws(Out.Unit)),
value = as.numeric(ED.Mean.T) # be sure it's numeric
) %>%
## 2. discard relative indices (% IBW, % SGR) -------------------------------
filter(
!str_detect(unit_raw, "% ibw") &
!str_detect(unit_raw, "% sgr")
) %>%
## 3. keep only entries that already contain “/d” or “/day” ------------------
filter(str_detect(unit_raw, "/d") | str_detect(unit_raw, "/day") |
str_detect(unit_raw, "g/kg metabolic weight")) %>%
## 4. parse amount unit ------------------------------------------------------
mutate(
amount_unit = case_when(
str_starts(unit_raw, "mg") ~ "mg",
str_starts(unit_raw, "g") ~ "g",
str_starts(unit_raw, "kg") ~ "kg",
TRUE ~ NA_character_
),
gain_kg = case_when(
amount_unit == "mg" ~ value / 1e6,
amount_unit == "g" ~ value / 1e3,
amount_unit == "kg" ~ value,
TRUE ~ NA_real_
)
) %>%
## 5. handle “g/kg metabolic weight” -----------------------------------------
left_join(
weights_unique %>%
select(B.Code, A.Level.Name, BW_start_kg = Out.WG.Start),
by = c("B.Code", "A.Level.Name")
) %>%
mutate(
ADG_kg = case_when(
str_detect(unit_raw, "g/kg metabolic weight") &
!is.na(value) & !is.na(BW_start_kg)
~ (value / 1000) * (BW_start_kg ^ 0.75), # g → kg × BW^0.75
TRUE ~ gain_kg # already kg/d
)
) %>%
## 6. final screen -----------------------------------------------------------
filter(!is.na(ADG_kg) & ADG_kg > 0) %>%
select(B.Code, A.Level.Name, ADG_kg)
############################################################
## EMISSION INTENSITY (kg CH₄ kg-¹ live-weight gain) ##
############################################################
## 1. merge CH₄ and ADG ------------------------------------
ei_df <- GE_combined %>% # CH₄ table
select(B.Code, A.Level.Name = D.Item,
CH4_kg_year, P.Product) %>%
inner_join(
wg_clean, # weight-gain table
by = c("B.Code", "A.Level.Name")
) %>%
## 2. compute emission intensity -------------------------
mutate(
gain_kg_year = ADG_kg * 365, # kg yr-¹
EI_CH4_kg = CH4_kg_year / gain_kg_year
) %>%
## 3. basic QC ------------------------------------------
filter(
!is.na(EI_CH4_kg) & EI_CH4_kg > 0, # remove missing / negative
EI_CH4_kg < 1 # drop extreme outliers (>1 kg CH₄ kg-¹ LW-gain)
)
length(unique(ei_df$B.Code))
## [1] 25
# A) box-and-jitter by species -----------------------------------------------
ggplot(ei_df, aes(x = P.Product, y = EI_CH4_kg, fill = P.Product)) +
geom_boxplot(outlier.shape = NA, alpha = 0.35) +
geom_jitter(width = 0.15, alpha = 0.65, size = 2) +
scale_y_continuous("Emission intensity (kg CH₄ kg⁻¹ LW-gain)",
labels = scales::comma_format()) +
labs(title = "Enteric-methane intensity of growth",
x = NULL) +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
# B) relationship between growth rate and EI ---------------------------------
ggplot(ei_df, aes(x = ADG_kg, y = EI_CH4_kg, colour = P.Product)) +
geom_point(alpha = 0.7, size = 2.3) +
geom_smooth(method = "lm", se = TRUE, linetype = "dashed") +
scale_x_log10("Average daily gain (kg animal⁻¹ day⁻¹)",
breaks = scales::log_breaks(),
labels = scales::comma_format()) +
scale_y_continuous("Emission intensity (kg CH₄ kg⁻¹ LW-gain)",
labels = scales::comma_format()) +
labs(title = "Faster-growing animals generally emit less CH₄ per kg gain",
colour = "Species") +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
## `geom_smooth()` using formula = 'y ~ x'
library(dplyr); library(ggplot2); library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
# 0. Make sure your weight-gain table is available
# (this is the object you built in the earlier step)
adg_clean <- wg_clean %>% # <- rename for clarity
select(B.Code, A.Level.Name, ADG_kg) # keep only the essentials
# 1. Bring EI together with the absolute-CH₄ table you already built
ei_tbl <- adg_clean %>% # has A.Level.Name
inner_join(
GE_combined,
by = c("B.Code" = "B.Code", # same name
"A.Level.Name" = "D.Item") # different names
) %>%
mutate(
EI = CH4_kg_year / (ADG_kg * 365) # kg CH₄ kg⁻¹ LW-gain
)
# 2. Median split lines (or choose any threshold you like)
vline <- median(ei_tbl$CH4_kg_year, na.rm = TRUE)
hline <- median(ei_tbl$EI, na.rm = TRUE)
x_lim <- range(ei_tbl$ADG_kg, na.rm = TRUE) # e.g. 0.002 – 0.8
y_lim <- range(ei_tbl$EI, na.rm = TRUE) # e.g. 0.005 – 1.2
ch4_lim <- range(ei_tbl$CH4_kg_year, na.rm = TRUE)
# 3. Plot ----------------------------------------------------------------
ggplot(ei_tbl,
aes(x = ADG_kg,
y = EI,
colour = CH4_kg_year,
size = CH4_kg_year)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, colour = "black") +
scale_x_log10(limits = x_lim,
breaks = log_breaks(),
labels = comma_format()) +
scale_y_log10(limits = y_lim,
breaks = c(.005, .01, .03, .1, .3, 1),
labels = comma_format()) +
scale_colour_viridis_c(option = "plasma",
trans = "log10",
limits = ch4_lim,
name = "Annual CH₄ (kg)") +
scale_size(range = c(2, 6),
trans = "log10",
limits = ch4_lim,
guide = "none") +
facet_wrap(~ P.Product) + # same axes in every facet
labs(title = "Faster-growing animals emit less CH4 per kg gain",
subtitle = "Axes are identical across cattle, goat and sheep panels",
x = "Average daily gain (kg animal⁻¹ day⁻¹, log scale)",
y = "Emission intensity (kg CH4 kg⁻¹ live-weight gain)",
caption = "Point colour & size scale with absolute annual CH4 per animal") +
theme_minimal(base_size = 12) +
theme(legend.position = "right")
## `geom_smooth()` using formula = 'y ~ x'
##############################################
## 1. PREPARE DAILY YIELD (kg animal-1 d-1)
##############################################
## density of milk for L → kg conversion
MILK_DENSITY <- 1.03 # kg L-1
yield_d_clean <- yield_data %>% # ← your harmonised table
filter(Out.Subind %in% c("Milk Yield", "Meat Yield")) %>%
mutate(
Yield_kg_day = case_when(
# ───────────── Milk ─────────────────────────────────────────────
Out.Subind == "Milk Yield" & Out.Unit == "l/individual/day" ~ ED.Mean.T * MILK_DENSITY,
Out.Subind == "Milk Yield" & Out.Unit == "kg/individual/day" ~ ED.Mean.T,
# ───────────── Meat (already kg day-1) ─────────────────────────
Out.Subind == "Meat Yield" & Out.Unit == "kg/individual/day" ~ ED.Mean.T,
# ───────────── Everything else (drop) ─────────────────────────
TRUE ~ NA_real_
)
) %>%
filter(!is.na(Yield_kg_day) & Yield_kg_day > 0) %>% # keep usable rows only
select(B.Code, A.Level.Name, Out.Subind, Yield_kg_day)
###########################################################################
## 2. COMPUTE EMISSION-INTENSITY (kg CH4 per kg *product*) #############
###########################################################################
ei_prod <- GE_combined %>% # absolute CH4 table
select(B.Code, A.Level.Name = D.Item,
CH4_kg_year, P.Product,T.Control) %>%
inner_join(yield_d_clean, by = c("B.Code","A.Level.Name")) %>%
mutate(
Yield_kg_year = Yield_kg_day * 365,
EI_CH4_prod = CH4_kg_year / Yield_kg_year,
Product = if_else(Out.Subind == "Milk Yield", "Milk", "Meat")
) %>%
filter(EI_CH4_prod > 0, EI_CH4_prod < 10) # drop negatives / crazy outliers
#########################################
## 3. QUICK DIAGNOSTIC PLOTS ##########
#########################################
# ── A) Box-and-jitter by species & product ───────────────────────────────
ggplot(ei_prod, aes(x = P.Product, y = EI_CH4_prod,
fill = P.Product)) +
geom_boxplot(outlier.shape = NA, alpha = .35) +
geom_jitter(width = .15, alpha = .65, size = 2) +
scale_y_log10("Emission-intensity (kg CH₄ kg⁻¹ product)",
breaks = scales::log_breaks(),
labels = scales::comma_format()) +
facet_wrap(~ Product, nrow = 1) +
labs(title = "Enteric-methane intensity of milk and meat production",
x = NULL) +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
# ── B) Relationship between yield and EI ────────────────────────────────
ggplot(ei_prod,
aes(x = Yield_kg_day,
y = EI_CH4_prod,
colour = P.Product)) +
geom_point(alpha = .7, size = 2.3) +
geom_smooth(method = "lm", se = TRUE, linetype = "dashed") +
scale_x_log10("Yield (kg animal⁻¹ day⁻¹, log scale)",
breaks = scales::log_breaks(),
labels = scales::comma_format()) +
scale_y_log10("Emission-intensity (kg CH₄ kg⁻¹ product)",
breaks = scales::log_breaks(),
labels = scales::comma_format()) +
facet_wrap(~ Product, nrow = 1) +
labs(title = "Higher-yielding animals emit less CH₄ per kg of product",
colour = "Species") +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
## `geom_smooth()` using formula = 'y ~ x'
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
length(unique(ei_prod$B.Code))
## [1] 41
################################################################################
## Join “changes” with emission-intensity of BOTH diets in every comparison ##
###############################################################################
## keep only the columns we need from ei_prod
ei_core <- ei_prod %>%
select(B.Code,
A.Level.Name, # diet name
EI_CH4_prod, # kg CH4 · kg-product⁻¹
CH4_kg_year, # (optional, if you want absolute CH4 later)
Product, P.Product, # meat / milk & species
T.Control, # yes / no
Yield_kg_day, Yield_kg_year) # keep yields if you like
ei_pairs <- changes %>%
## ---- attach intensity for diet_A -----------------------------------------
left_join(ei_core,
by = c("B.Code", "diet_A" = "A.Level.Name"),
suffix = c("", "_A")) %>% # tmp suffix to avoid clashes
rename(EI_A = EI_CH4_prod) %>% # keep only the field we need
## ---- attach intensity for diet_B -----------------------------------------
left_join(ei_core,
by = c("B.Code", "diet_B" = "A.Level.Name"),
suffix = c("_A", "_B")) %>% # second join, new suffix
rename(EI_B = EI_CH4_prod) %>%
## ---- derive deltas & practice magnitude -----------------------------------
mutate(
delta_EI_prod = EI_B - EI_A, # + ⇒ diet_B is worse
abs_change = coalesce(total_pct_added, 0) +
coalesce(total_pct_removed, 0) +
coalesce(total_pct_increase, 0) +
coalesce(total_pct_decrease, 0),
pair_label = paste(diet_A, "→", diet_B)
) %>%
## keep only rows where we have both intensities
filter(!is.na(delta_EI_prod))
## Warning in left_join(., ei_core, by = c("B.Code", diet_A = "A.Level.Name"), : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 4 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## Warning in left_join(., ei_core, by = c("B.Code", diet_B = "A.Level.Name"), : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 4 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
# keep only pairs with EI on both sides
library(dplyr)
library(stringr)
type_cols <- c("added_by_type",
"removed_by_type",
"increase_by_type",
"decrease_by_type")
ei_pairs <- ei_pairs %>%
filter(
!if_any(
all_of(type_cols),
~ !is.na(.) & # real value, not missing
str_detect(trimws(.), "^NA\\s*:") # starts with “NA:”
)
)
species_counts <- ei_pairs %>%
distinct(B.Code, Species = P.Product_A) %>% # 1 row per paper × species
count(Species, name = "n_papers")
###################################
## Practice-vs-EI plotting chunk
################################### # install.packages("patchwork") once
## ───────────────── 0 · Prep ────────────────────────────────────────────────
ei_pairs_plot <- ei_pairs %>%
# 0·a choose one species/product label
mutate(Product = coalesce(Product_B, Product_A,
P.Product_B, P.Product_A)) %>%
# 0·b compute yield delta before selecting
mutate(delta_Yield = Yield_kg_day_B - Yield_kg_day_A) %>%
select(abs_change,
delta_EI_prod,
delta_Yield,
practice_type,
Product) %>%
filter(!is.na(Product),
!is.na(delta_EI_prod))
## ───────────────── 1 · scatter, 2 · violin, 3 · trade-off ──────────────────
p1 <- ggplot(ei_pairs_plot,
aes(abs_change, delta_EI_prod, colour = practice_type)) +
geom_hline(yintercept = 0, linetype = 2, colour = "grey50") +
geom_point(alpha = .7, size = 2) +
geom_smooth(method = "lm", se = TRUE, colour = "black",
formula = y ~ x) +
scale_x_continuous("% of diet DM changed") +
scale_y_continuous("Δ EI (kg CH₄ / kg product)") +
facet_wrap(~ Product) +
theme_bw() +
theme(legend.position = "bottom")
p2 <- ggplot(ei_pairs_plot,
aes(practice_type, delta_EI_prod, fill = practice_type)) +
geom_violin(trim = FALSE, alpha = .35, colour = NA) +
geom_boxplot(width = .18, outlier.shape = NA) +
stat_summary(fun = median, geom = "point",
size = 2, colour = "black") +
geom_hline(yintercept = 0, linetype = 2, colour = "grey40") +
scale_y_continuous("Δ EI (kg CH₄ / kg product)") +
facet_wrap(~ Product, nrow = 1) +
labs(x = NULL, title = "Emission-intensity shift by practice type") +
theme_bw() +
theme(legend.position = "none")
p3 <- ggplot(ei_pairs_plot,
aes(delta_Yield, delta_EI_prod,
colour = practice_type,
size = abs_change)) +
geom_vline(xintercept = 0, linetype = 2, colour = "grey60") +
geom_hline(yintercept = 0, linetype = 2, colour = "grey60") +
geom_point(alpha = .7) +
scale_x_continuous("Δ Yield (kg animal⁻¹ d⁻¹)",
labels = scales::comma) +
scale_y_continuous("Δ EI (kg CH₄ / kg product)",
labels = scales::comma) +
scale_size(range = c(2,6), name = "% moved") +
facet_wrap(~ Product) +
labs(title = "Productivity vs. emission-intensity trade-offs") +
theme_minimal() +
theme(legend.position = "bottom")
## ───────────────── 2 · show or combine ──────────────────────────────────────
p1
p2
p3
# (p1 | p2 | p3) + plot_layout(guides = "collect") # uncomment for one panel
library(tidyverse)
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ scales::discard() masks purrr::discard()
## ✖ lubridate::duration() masks arrow::duration()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ lubridate::second() masks data.table::second()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## ── 1 · tidy table -----------------------------------------------------------
plot_df <- ei_pairs %>%
filter(practice_type %in% c("addition", "substitution")) %>% # only add / sub
mutate(abs_change = total_pct_added + total_pct_removed +
total_pct_increase + total_pct_decrease) %>% # magnitude
separate_rows(added_by_type, sep = ";\\s*") %>% # explode list
mutate(
Ingredient_Type = str_remove(added_by_type, ":.*$") %>% str_trim(),
Ingredient_Type = case_when( # collapse names
str_detect(Ingredient_Type, regex("supplement", TRUE)) ~ "Supplement",
str_detect(Ingredient_Type, regex("concentrate", TRUE)) ~ "Concentrate",
TRUE ~ Ingredient_Type
)
) %>%
filter(Ingredient_Type != "", Ingredient_Type != "Entire Diet") %>%
mutate(Product = coalesce(!!!syms(names(.)[str_detect(names(.), "Product")]))) %>%
select(Product, practice_type, Ingredient_Type,
abs_change, delta_EI_prod)
## ── 2 · bubble panel ---------------------------------------------------------
ggplot(plot_df,
aes(abs_change, delta_EI_prod,
colour = Ingredient_Type,
shape = practice_type,
size = abs_change)) +
geom_hline(yintercept = 0, linetype = 2, colour = "grey50") +
geom_point(alpha = .75) +
scale_size(range = c(3,10), guide = "none") +
scale_x_continuous("% of diet DM changed") +
scale_y_continuous("Δ EI (kg CH₄ / kg product)",
labels = scales::comma) +
facet_wrap(~ Product) +
labs(title = "Effect of adding / substituting single ingredient categories",
colour = "Ingredient category",
shape = "Practice") +
theme_bw() +
theme(legend.position = "right")
library(tidyverse)
library(ggalt) # install.packages("ggalt") for geom_dumbbell()
## ── 1 · tidy substitutions table --------------------------------------------
subs_db <- ei_pairs %>%
filter(practice_type == "substitution") %>%
# explode the semicolon lists
separate_rows(removed_by_type, added_by_type, sep = ";\\s*") %>%
# trim & strip trailing numbers, then normalise categories
mutate(
removed = removed_by_type %>% str_remove(":.*$") %>% str_trim() %>% str_to_lower(),
added = added_by_type %>% str_remove(":.*$") %>% str_trim() %>% str_to_lower(),
removed = case_when(
str_detect(removed, "supplement") ~ "Supplement",
str_detect(removed, "concentrate") ~ "Concentrate",
TRUE ~ str_to_title(removed)
),
added = case_when(
str_detect(added, "supplement") ~ "Supplement",
str_detect(added, "concentrate") ~ "Concentrate",
TRUE ~ str_to_title(added)
)
) %>%
# drop blanks & “Entire Diet”
filter(removed != "", added != "",
removed != "Entire Diet", added != "Entire Diet") %>%
# pair label + product label
mutate(
pair_id = paste(removed, "→", added),
Product = coalesce(!!!syms(names(.)[str_detect(names(.), "Product")]))
)
## ── 2 · dumb-bell plot -------------------------------------------------------
subs_db <- subs_db %>% # reuse the table
mutate(effect = case_when(
delta_EI_prod <= -0.02 ~ "Lower EI",
delta_EI_prod >= 0.02 ~ "Higher EI",
TRUE ~ "Neutral"
))
library(tidyverse)
## subs_db already built & classified with “effect” ----
## left dot = original EI (grey)
## right dot = new EI (colour by effect)
ggplot(subs_db,
aes(y = reorder(pair_id, delta_EI_prod))) +
## segment
geom_segment(aes(x = 0, xend = delta_EI_prod),
colour = "grey70") +
## left (before) dot
geom_point(aes(x = 0),
colour = "grey60",
size = 2) +
## right (after) dot – coloured by effect
geom_point(aes(x = delta_EI_prod,
colour = effect),
size = 2) +
scale_colour_manual(values = c(
"Lower EI" = "#2b83ba", # blue = improvement
"Neutral" = "grey70",
"Higher EI" = "#d7191c" # red = worse
)) +
facet_wrap(~ Product, scales = "free_y") +
geom_vline(xintercept = 0, linetype = 2, colour = "grey50") +
labs(x = "Δ EI (kg CH₄ / kg product)",
y = NULL,
colour = "Effect",
title = "Direction & magnitude of each substitution") +
theme_minimal()
add intensity of the substitution. how are subtitution done ? removed and added werid concentrate thing. do it by species ? how many pooints ?
library(tidyverse)
library(scales) # for pretty breaks if you want
## ── 1 · tidy substitutions + magnitude --------------------------------------
subs_db <- ei_pairs %>%
filter(practice_type == "substitution") %>%
## magnitude of this reformulation
mutate(pct_moved = 0.5 * (total_pct_added + total_pct_removed + # counts each swap once
total_pct_increase + total_pct_decrease)
) %>%
separate_rows(removed_by_type, added_by_type, sep = ";\\s*") %>%
mutate(
removed = removed_by_type %>% str_remove(":.*$") %>% str_trim() %>% str_to_lower(),
added = added_by_type %>% str_remove(":.*$") %>% str_trim() %>% str_to_lower(),
removed = case_when(
str_detect(removed, "supplement") ~ "Supplement",
str_detect(removed, "concentrate") ~ "Concentrate",
TRUE ~ str_to_title(removed)),
added = case_when(
str_detect(added, "supplement") ~ "Supplement",
str_detect(added, "concentrate") ~ "Concentrate",
TRUE ~ str_to_title(added))
) %>%
filter(removed != "", added != "",
removed != "Entire Diet", added != "Entire Diet") %>%
mutate(pair_id = paste(removed, "→", added),
Product = coalesce(!!!syms(names(.)[str_detect(names(.), "Product")])),
effect = case_when(
delta_EI_prod <= -0.02 ~ "Lower EI",
delta_EI_prod >= 0.02 ~ "Higher EI",
TRUE ~ "Neutral"))
## ── 2 · dumb-bell with magnitude encoded ------------------------------------
ggplot(subs_db,
aes(y = reorder(pair_id, delta_EI_prod))) +
## segment thickness = % DM moved
geom_segment(aes(x = 0,
xend = delta_EI_prod,
size = pct_moved), # <- line width
colour = "grey70",
lineend = "round") +
## before-swap dot (always grey)
geom_point(aes(x = 0),
colour = "grey60",
size = 2) +
## after-swap dot: colour = direction, size = % moved
geom_point(aes(x = delta_EI_prod,
colour = effect,
size = pct_moved)) +
scale_colour_manual(values = c(
"Lower EI" = "#2b83ba",
"Neutral" = "grey70",
"Higher EI" = "#d7191c")) +
scale_size(range = c(0.5, 4), # tweak min/max stroke & dot sizes
breaks = pretty_breaks(4),
name = "% diet DM\nmoved") +
facet_wrap(~ Product, scales = "free_y") +
geom_vline(xintercept = 0, linetype = 2, colour = "grey50") +
labs(x = "Δ EI (kg CH₄ / kg product)",
y = NULL,
colour = "Effect",
title = "Direction, magnitude and intensity of each substitution") +
theme_minimal() +
theme(legend.position = "right")
ggplot(subs_db,
aes(x = delta_EI_prod,
y = pct_moved,
colour = effect, # blue / red / grey
size = pct_moved)) + # double-encode size
geom_vline(xintercept = 0, linetype = 2) +
geom_point(alpha = .75) +
scale_size(range = c(3,12), guide = "none") +
scale_y_continuous("% of diet DM substituted") +
scale_x_continuous("Δ EI (kg CH₄ / kg product)",
breaks = scales::pretty_breaks()) +
facet_wrap(~ Product) +
theme_bw()
heat_tbl <- subs_db %>%
group_by(removed, added, Product) %>%
summarise( mean_EI = mean(delta_EI_prod),
mean_pct = mean(pct_moved),
n = n(), .groups = "drop") %>%
filter(n >= 3)
# ggplot(heat_tbl,
# aes(removed, added, fill = mean_EI)) +
# geom_tile(colour = "white") +
# geom_text(aes(label = round(mean_pct,0)), size = 3) + # % printed on tile
# scale_fill_gradient2(low = "#2b83ba", mid = "white",
# high = "#d7191c", midpoint = 0,
# name = "Δ EI") +
# facet_wrap(~ Product) +
# labs(x = "Category removed", y = "Category added",
# title = "Average EI shift (colour) and % DM swapped (number)") +
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 45, hjust = 1))
na_GE_counts <- Gross_energy %>%
filter(DC.Variable == "GE") %>% # only GE rows
mutate(DC.Value = as.numeric(DC.Value)) %>% # blanks → NA
group_by(D.Item) %>%
summarise(
n_rows = n(), # total GE rows for this item
n_na = sum(is.na(DC.Value)), # how many of them are NA
all_GE_missing = n_na == n_rows, # TRUE ⇢ every GE row is NA
.groups = "drop"
)