library(pacman)
p_load(agridat, dplyr, tidyr, stringr, forcats, skimr, lubridate)Transforming Ag data in R II
1 More packages for data manipulation…🚀
This lesson builds on our previous session by introducing more advanced data wrangling techniques using tidyr, stringr, and forcats. We will explore how to manipulate and transform data for efficient analysis. Additionally, we introduce lubridate for handling dates effectively.
1.1 Required packages for today
2 Advanced dplyr Functions

2.1 Aggregation with group_by() and summarize()
data_corn <- agridat::lasrosas.corn
summary_data <- data_corn %>%
group_by(topo, year) %>%
summarize(mean_yield = mean(yield, na.rm = TRUE), .groups = "drop")
summary_data# A tibble: 8 × 3
topo year mean_yield
<fct> <int> <dbl>
1 E 1999 64.8
2 E 2001 92.7
3 HT 1999 53.4
4 HT 2001 44.7
5 LO 1999 71.2
6 LO 2001 99.9
7 W 1999 66.0
8 W 2001 67.7
2.2 Applying Functions to Multiple Columns using across()
data_across <- data_corn %>%
mutate(across(c(lat, long), ~ round(.x, digits=1), .names = "rounded_{.col}")) # Rounding values to 1 decimal place for better readability
head(data_across) year lat long yield nitro topo bv rep nf rounded_lat
1 1999 -33.05113 -63.84886 72.14 131.5 W 162.60 R1 N5 -33.1
2 1999 -33.05115 -63.84879 73.79 131.5 W 170.49 R1 N5 -33.1
3 1999 -33.05116 -63.84872 77.25 131.5 W 168.39 R1 N5 -33.1
4 1999 -33.05117 -63.84865 76.35 131.5 W 176.68 R1 N5 -33.1
5 1999 -33.05118 -63.84858 75.55 131.5 W 171.46 R1 N5 -33.1
6 1999 -33.05120 -63.84851 70.24 131.5 W 170.56 R1 N5 -33.1
rounded_long
1 -63.8
2 -63.8
3 -63.8
4 -63.8
5 -63.8
6 -63.8
2.3 Creating Conditional Columns with case_when()
data_casewhen <- data_corn %>%
mutate(yield_category = case_when(
yield > 10 ~ "High",
yield > 5 ~ "Medium",
TRUE ~ "Low"
))
head(data_casewhen) year lat long yield nitro topo bv rep nf yield_category
1 1999 -33.05113 -63.84886 72.14 131.5 W 162.60 R1 N5 High
2 1999 -33.05115 -63.84879 73.79 131.5 W 170.49 R1 N5 High
3 1999 -33.05116 -63.84872 77.25 131.5 W 168.39 R1 N5 High
4 1999 -33.05117 -63.84865 76.35 131.5 W 176.68 R1 N5 High
5 1999 -33.05118 -63.84858 75.55 131.5 W 171.46 R1 N5 High
6 1999 -33.05120 -63.84851 70.24 131.5 W 170.56 R1 N5 High
2.4 Slicing data with slice():
# Selecting the first 3 rows
first_rows <- data_corn %>% slice(1:3)
head(first_rows) year lat long yield nitro topo bv rep nf
1 1999 -33.05113 -63.84886 72.14 131.5 W 162.60 R1 N5
2 1999 -33.05115 -63.84879 73.79 131.5 W 170.49 R1 N5
3 1999 -33.05116 -63.84872 77.25 131.5 W 168.39 R1 N5
# Selecting the last 3 rows
last_rows <- data_corn %>% slice_tail(n = 3)
head(last_rows) year lat long yield nitro topo bv rep nf
1 2001 -33.05110 -63.84189 92.33 39 LO 166.75 R3 N1
2 2001 -33.05112 -63.84182 88.98 39 LO 163.59 R3 N1
3 2001 -33.05115 -63.84175 85.74 39 LO 163.48 R3 N1
# Selecting 3 random rows
random_rows <- data_corn %>% slice_sample(n = 3)
head(random_rows) year lat long yield nitro topo bv rep nf
1 2001 -33.05185 -63.84193 99.83 124.6 LO 167.20 R2 N5
2 1999 -33.05139 -63.84590 52.24 53.0 HT 184.92 R1 N2
3 2001 -33.05180 -63.84207 97.56 124.6 LO 169.84 R2 N5
# Selecting every 2nd row
every_second_row <- data_corn %>% slice(seq(1, n(), by = 2))
head(every_second_row) year lat long yield nitro topo bv rep nf
1 1999 -33.05113 -63.84886 72.14 131.5 W 162.60 R1 N5
2 1999 -33.05116 -63.84872 77.25 131.5 W 168.39 R1 N5
3 1999 -33.05118 -63.84858 75.55 131.5 W 171.46 R1 N5
4 1999 -33.05121 -63.84844 76.17 131.5 W 172.94 R1 N5
5 1999 -33.05123 -63.84830 69.77 131.5 W 171.88 R1 N5
6 1999 -33.05126 -63.84816 71.05 131.5 W 173.02 R1 N5
2.5 Working with Time-Series Data: lead() and lag()
data_lag <- data_corn %>%
arrange(year, topo) %>%
mutate(yield_change = yield - lag(yield))
head(data_lag) year lat long yield nitro topo bv rep nf yield_change
1 1999 -33.05174 -63.84532 59.94 131.5 E 182.12 R1 N5 NA
2 1999 -33.05175 -63.84525 58.96 131.5 E 182.57 R1 N5 -0.98
3 1999 -33.05176 -63.84518 61.77 131.5 E 178.07 R1 N5 2.81
4 1999 -33.05178 -63.84511 66.41 131.5 E 177.83 R1 N5 4.64
5 1999 -33.05179 -63.84504 66.06 131.5 E 176.17 R1 N5 -0.35
6 1999 -33.05180 -63.84497 62.13 131.5 E 176.56 R1 N5 -3.93
3 Data Tidying with tidyr

tidyr helps reshape data into a tidy format. Some key functions:
3.1 Gathering and Spreading Data
# Convert from wide to long format using pivot_longer
long_data <- data_corn %>%
pivot_longer(cols = c(yield, nitro),
names_to = "measurement", # name of the column with description
values_to = "value") # name of the column with values
head(long_data)# A tibble: 6 × 9
year lat long topo bv rep nf measurement value
<int> <dbl> <dbl> <fct> <dbl> <fct> <fct> <chr> <dbl>
1 1999 -33.1 -63.8 W 163. R1 N5 yield 72.1
2 1999 -33.1 -63.8 W 163. R1 N5 nitro 132.
3 1999 -33.1 -63.8 W 170. R1 N5 yield 73.8
4 1999 -33.1 -63.8 W 170. R1 N5 nitro 132.
5 1999 -33.1 -63.8 W 168. R1 N5 yield 77.2
6 1999 -33.1 -63.8 W 168. R1 N5 nitro 132.
# Convert back from long to wide format using pivot_wider
wide_data <- long_data %>%
pivot_wider(names_from = measurement,
values_from = value)
head(wide_data)# A tibble: 6 × 9
year lat long topo bv rep nf yield nitro
<int> <dbl> <dbl> <fct> <dbl> <fct> <fct> <dbl> <dbl>
1 1999 -33.1 -63.8 W 163. R1 N5 72.1 132.
2 1999 -33.1 -63.8 W 170. R1 N5 73.8 132.
3 1999 -33.1 -63.8 W 168. R1 N5 77.2 132.
4 1999 -33.1 -63.8 W 177. R1 N5 76.4 132.
5 1999 -33.1 -63.8 W 171. R1 N5 75.6 132.
6 1999 -33.1 -63.8 W 171. R1 N5 70.2 132.
3.2 Separating and Uniting Columns
# Example dataset with a combined column
example_data <- data_corn %>%
mutate(topo_year = paste(topo, year, sep = "_"))
# Splitting 'topo_year' into two columns
separated_data <- example_data %>%
separate(topo_year, into = c("topo", "year"), sep = "_")
head(separated_data) lat long yield nitro bv rep nf topo year
1 -33.05113 -63.84886 72.14 131.5 162.60 R1 N5 W 1999
2 -33.05115 -63.84879 73.79 131.5 170.49 R1 N5 W 1999
3 -33.05116 -63.84872 77.25 131.5 168.39 R1 N5 W 1999
4 -33.05117 -63.84865 76.35 131.5 176.68 R1 N5 W 1999
5 -33.05118 -63.84858 75.55 131.5 171.46 R1 N5 W 1999
6 -33.05120 -63.84851 70.24 131.5 170.56 R1 N5 W 1999
# Combining 'topo' and 'year' back into a single column
united_data <- separated_data %>%
unite("topo_year", topo, year, sep = "-")
head(united_data) lat long yield nitro bv rep nf topo_year
1 -33.05113 -63.84886 72.14 131.5 162.60 R1 N5 W-1999
2 -33.05115 -63.84879 73.79 131.5 170.49 R1 N5 W-1999
3 -33.05116 -63.84872 77.25 131.5 168.39 R1 N5 W-1999
4 -33.05117 -63.84865 76.35 131.5 176.68 R1 N5 W-1999
5 -33.05118 -63.84858 75.55 131.5 171.46 R1 N5 W-1999
6 -33.05120 -63.84851 70.24 131.5 170.56 R1 N5 W-1999
3.3 Nesting and Unnesting Data
nested_data <- data_corn %>%
group_by(topo) %>%
nest()
head(nested_data)# A tibble: 4 × 2
# Groups: topo [4]
topo data
<fct> <list>
1 W <tibble [1,043 × 8]>
2 HT <tibble [785 × 8]>
3 E <tibble [730 × 8]>
4 LO <tibble [885 × 8]>
unnested_data <- nested_data %>%
unnest(cols = c(data))
head(unnested_data)# A tibble: 6 × 9
# Groups: topo [1]
topo year lat long yield nitro bv rep nf
<fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <fct>
1 W 1999 -33.1 -63.8 72.1 132. 163. R1 N5
2 W 1999 -33.1 -63.8 73.8 132. 170. R1 N5
3 W 1999 -33.1 -63.8 77.2 132. 168. R1 N5
4 W 1999 -33.1 -63.8 76.4 132. 177. R1 N5
5 W 1999 -33.1 -63.8 75.6 132. 171. R1 N5
6 W 1999 -33.1 -63.8 70.2 132. 171. R1 N5
4 String Manipulation with stringr

The stringr package provides a consistent way to work with character strings.
4.1 Detecting and Extracting Strings
names <- c("Wheat Field", "Corn Field", "Soybean Farm")
str_detect(names, "Field") # Check if 'Field' is present[1] TRUE TRUE FALSE
str_subset(names, "Corn") # Extract values containing 'Corn'[1] "Corn Field"
4.2 Modifying Strings
names <- str_replace(names, "Field", "Plot")
names[1] "Wheat Plot" "Corn Plot" "Soybean Farm"
capitalized_names <- str_to_title(names)
head(capitalized_names)[1] "Wheat Plot" "Corn Plot" "Soybean Farm"
data_clean <- data_corn %>%
mutate(topo_clean = str_replace_all(topo, "[^a-zA-Z0-9]", "_"))
head(data_clean) year lat long yield nitro topo bv rep nf topo_clean
1 1999 -33.05113 -63.84886 72.14 131.5 W 162.60 R1 N5 W
2 1999 -33.05115 -63.84879 73.79 131.5 W 170.49 R1 N5 W
3 1999 -33.05116 -63.84872 77.25 131.5 W 168.39 R1 N5 W
4 1999 -33.05117 -63.84865 76.35 131.5 W 176.68 R1 N5 W
5 1999 -33.05118 -63.84858 75.55 131.5 W 171.46 R1 N5 W
6 1999 -33.05120 -63.84851 70.24 131.5 W 170.56 R1 N5 W
4.3 Splitting Strings
words <- "Wheat,Corn,Soybean"
split_words <- str_split(words, ",")
head(split_words)[[1]]
[1] "Wheat" "Corn" "Soybean"
5 Factor Handling with forcats
forcats provides tools to manipulate categorical data effectively.
5.1 Reordering Factors
library(forcats)
crops <- factor(c("soybean", "corn", "wheat"), levels = c("wheat", "corn", "soybean"))
crops <- fct_relevel(crops, "corn") # Moves 'corn' to first position
head(crops)[1] soybean corn wheat
Levels: corn wheat soybean
5.2 Lump Rare Categories Together
set.seed(123)
data <- data.frame(crop = sample(c("corn", "soybean", "wheat", "barley", "oats"), 20, replace = TRUE))
# Using mutate() to lump rare categories together
factor_data <- data %>%
mutate(crop_lumped = fct_lump_n(crop, n = 3)) # Keep top 3 categories, lump others into 'Other'
factor_data crop crop_lumped
1 wheat wheat
2 wheat wheat
3 soybean soybean
4 soybean soybean
5 wheat wheat
6 oats Other
7 barley Other
8 corn corn
9 soybean soybean
10 wheat wheat
11 oats Other
12 wheat wheat
13 wheat wheat
14 corn corn
15 barley Other
16 corn corn
17 corn corn
18 oats Other
19 wheat wheat
20 soybean soybean
6 Date Handling with lubridate

The lubridate package simplifies working with dates and times in R.
6.0.1 Parsing Dates
dates <- c("2023-06-15", "2024-01-20", "2025-07-04")
parsed_dates <- ymd(dates)
parsed_dates[1] "2023-06-15" "2024-01-20" "2025-07-04"
6.1 Extracting Date Components
year(parsed_dates)[1] 2023 2024 2025
month(parsed_dates)[1] 6 1 7
day(parsed_dates)[1] 15 20 4
6.2 Parsing and Extracting Date Components
dates_corn <- data_corn %>%
mutate(date = ymd(paste(year, "01", "01", sep = "-")))
head(dates_corn) year lat long yield nitro topo bv rep nf date
1 1999 -33.05113 -63.84886 72.14 131.5 W 162.60 R1 N5 1999-01-01
2 1999 -33.05115 -63.84879 73.79 131.5 W 170.49 R1 N5 1999-01-01
3 1999 -33.05116 -63.84872 77.25 131.5 W 168.39 R1 N5 1999-01-01
4 1999 -33.05117 -63.84865 76.35 131.5 W 176.68 R1 N5 1999-01-01
5 1999 -33.05118 -63.84858 75.55 131.5 W 171.46 R1 N5 1999-01-01
6 1999 -33.05120 -63.84851 70.24 131.5 W 170.56 R1 N5 1999-01-01
7 Summary
Today, we explored: - tidyr for reshaping and tidying data, including nest() and unnest(). - stringr for working with text data. - forcats for handling categorical variables. - lubridate for working with date data.
These functions will help you work efficiently with real-world agricultural data. Next session, we will integrate these skills into a full data processing workflow!