Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
breakoutModewide
breakoutWidth760
dataf <- read.table("walrus_sounds.tsv", header = F, sep = "\t")
colnames(dataf) <- c('name', 'vocalization', 'min_sec')

sec1 <- as.numeric(str_split_i(dataf$min_sec, ':', 1))*60 
sec2 <- as.numeric(str_split_i(dataf$min_sec, ':', 2))

dataf$seconds <- sec1+sec2
dataf

mean_by_group_baseR <- aggregate(seconds ~ name + vocalization, data = dataf, FUN = mean)
mean_by_group_baseR_jocko <- mean_by_group_baseR[mean_by_group_baseR$name == 'Jocko',]
jocko_seconds <- mean_by_group_baseR_jocko$seconds


mean_by_group_dplyr_jocko <- dataf %>%
    mutate(seconds = sec1+sec2) %>%
    group_by(name,vocalization) %>%
    summarize(Mean_seconds = mean(seconds)) %>%
    filter(name=="Jocko") %>% 
    as.data.frame()

#if you want to extract a column into a vector, use pull()
jocko_seconds <- mean_by_group_dplyr_jocko %>% pull(Mean_seconds)

#option, left merge
Note

We sent the same dataframe into a series of base R manipulations, and into a dpyler pipe expression. What we ended up with were a dataframe and a tibble. You should know at this point that a tibble is different from a dataframe. A tibble will not behave like a dataframe, so use as.data.frame(tibble) to recover a dataframe.

...

Code Block
breakoutModewide
breakoutWidth760
mtcars_long <- mtcars %>%
      rownames_to_column(var = "model") %>% # Convert row names (car models) to a column
      pivot_longer(
        cols = -c(mpg, model), # Select all columns except 'model', 'mpg' to pivot
        names_to = "variable", # New column for the original column names
        values_to = "value" # New column for the values from the original columns
      )
      
ggplot(mtcars_long, aes(x = mpg, y = value, color = variable)) +
      geom_point() +
      facet_wrap(~ variable, scales = "free_y") + 
      labs(title = "mpg vs. variables", x = "Miles Per Gallon (mpg)", y = "Value") +
      theme_minimal() 

More on joining

Full (outer):

...

Inner:

...

Left:

...

Right:

...

Code Block
breakoutModewide
breakoutWidth760
dataf <- read.table("walrus_sounds.tsv", header = F, sep = "\t")
colnames(dataf) <- c('name', 'vocalization', 'min_sec')
sec1 <- as.numeric(str_split_i(dataf$min_sec, ':', 1))*60 
sec2 <- as.numeric(str_split_i(dataf$min_sec, ':', 2))
dataf$seconds <- sec1+sec2
dataf

result <- aggregate(seconds ~ name + vocalization, data = dataf, FUN = mean)

df_wide <- reshape(result,
  idvar = "V1",              # Column(s) that identify unique observations
  timevar = "V2",         # Column whose values will become new column names
  v.names = "length",         # Column(s) whose values will populate the new columns
  direction = "wide"         # Specifies the reshaping direction
)

mean_by_group_dplyr <- dataf %>%
   group_by(V1,V2) %>%
   summarize(Mean_Value = mean(length)) 

#instead, use dplyer to pivot wide
library(tidyr)
 df_wide_tidy <- mean_by_group_dplyr %>% pivot_wider(names_from = V2, values_from = Mean_Value)
as.data.frame(df_wide_tidy)

Can combine them: but must be careful what type of objects you're working with:
For example:
#the following fails when run on mean_by_group_dplyr, because group_by outputs a 'tibble'.  Must convert to data.frame for it to work properly in base R -- as.data.frame(), or conversely, as.tibble()

df_wide <- reshape(as.data.frame(mean_by_group_dplyr),
  idvar = "V1",              # Column(s) that identify unique observations
  timevar = "V2",         # Column whose values will become new column names
  v.names = "Mean_Value",         # Column(s) whose values will populate the new columns
  direction = "wide"         # Specifies the reshaping direction
)