...
| Code Block | ||||
|---|---|---|---|---|
| ||||
dataf <- read.table("walrus_sounds.tsv", header = F, sep = "\t")
colnames(dataf) <- c('name', 'vocalization', 'min_sec')
sec1 <- as.numeric(str_split_i(dataf$min_sec, ':', 1))*60
sec2 <- as.numeric(str_split_i(dataf$min_sec, ':', 2))
dataf$seconds <- sec1+sec2
dataf
mean_by_group_baseR <- aggregate(seconds ~ name + vocalization, data = dataf, FUN = mean)
mean_by_group_baseR_jocko <- mean_by_group_baseR[mean_by_group_baseR$name == 'Jocko',]
jocko_seconds <- mean_by_group_baseR_jocko$seconds
mean_by_group_dplyr_jocko <- dataf %>%
mutate(seconds = sec1+sec2) %>%
group_by(name,vocalization) %>%
summarize(Mean_seconds = mean(seconds)) %>%
filter(name=="Jocko") %>%
as.data.frame()
#if you want to extract a column into a vector, use pull()
jocko_seconds <- mean_by_group_dplyr_jocko %>% pull(Mean_seconds)
#option, left merge
|
| Note |
|---|
We sent the same dataframe into a series of base R manipulations, and into a dpyler pipe expression. What we ended up with were a dataframe and a tibble. You should know at this point that a tibble is different from a dataframe. A tibble will not behave like a dataframe, so use as.data.frame(tibble) to recover a dataframe. |
...
| Code Block | ||||
|---|---|---|---|---|
| ||||
mtcars_long <- mtcars %>%
rownames_to_column(var = "model") %>% # Convert row names (car models) to a column
pivot_longer(
cols = -c(mpg, model), # Select all columns except 'model', 'mpg' to pivot
names_to = "variable", # New column for the original column names
values_to = "value" # New column for the values from the original columns
)
ggplot(mtcars_long, aes(x = mpg, y = value, color = variable)) +
geom_point() +
facet_wrap(~ variable, scales = "free_y") +
labs(title = "mpg vs. variables", x = "Miles Per Gallon (mpg)", y = "Value") +
theme_minimal()
|
More on joining
Full (outer):
...
Inner:
...
Left:
...
Right:
...
| Code Block | ||||
|---|---|---|---|---|
| ||||
dataf <- read.table("walrus_sounds.tsv", header = F, sep = "\t")
colnames(dataf) <- c('name', 'vocalization', 'min_sec')
sec1 <- as.numeric(str_split_i(dataf$min_sec, ':', 1))*60
sec2 <- as.numeric(str_split_i(dataf$min_sec, ':', 2))
dataf$seconds <- sec1+sec2
dataf
result <- aggregate(seconds ~ name + vocalization, data = dataf, FUN = mean)
df_wide <- reshape(result,
idvar = "V1", # Column(s) that identify unique observations
timevar = "V2", # Column whose values will become new column names
v.names = "length", # Column(s) whose values will populate the new columns
direction = "wide" # Specifies the reshaping direction
)
mean_by_group_dplyr <- dataf %>%
group_by(V1,V2) %>%
summarize(Mean_Value = mean(length))
#instead, use dplyer to pivot wide
library(tidyr)
df_wide_tidy <- mean_by_group_dplyr %>% pivot_wider(names_from = V2, values_from = Mean_Value)
as.data.frame(df_wide_tidy)
Can combine them: but must be careful what type of objects you're working with:
For example:
#the following fails when run on mean_by_group_dplyr, because group_by outputs a 'tibble'. Must convert to data.frame for it to work properly in base R -- as.data.frame(), or conversely, as.tibble()
df_wide <- reshape(as.data.frame(mean_by_group_dplyr),
idvar = "V1", # Column(s) that identify unique observations
timevar = "V2", # Column whose values will become new column names
v.names = "Mean_Value", # Column(s) whose values will populate the new columns
direction = "wide" # Specifies the reshaping direction
) |