第 8 章 Working with data frame

paste(
  "https://stats.moe.gov.tw/files/school", 
  104:108, 
  c(rep("faraway_new.csv",3), rep("faraway1.csv", 2)),
  sep="/"
) -> list_farawaySchools
list_farawaySchools

For a given link:

library(readr)
faraway3 <- read_csv("https://stats.moe.gov.tw/files/school/108/faraway3.csv")

data_explore <- list(
  dataframe = faraway3, 
  summary = list()
)
library(dplyr)

8.1 pipe

f(x) # becomes
x %>% f()

f(x, ...) # becomes
x %>% f(...)

R 4.1.0 + build in |>

f(x) # becomes
x |> f()

%___% is a user-defined special function of two arguments, say x and y. It can be used in two ways:

  • `%___%`(x, y)

  • x `%___%` y

`%--->%` <- function(x, fn){
  fn(x)
}
exp(5)
5 %--->% exp

nested call

f(g(x)) # becomes
x %>% {f(g(.))} # where {} and . are neccessary.

8.2 Summarise

  • check class

  • check NA

faraway3 %>% 
  summarise(
    平均=mean(原住民學生比率),
    缺失資料= (function(x)(sum(is.na(x))))(原住民學生比率)
  ) -> data_explore$summary$原住民學生比率
  • anonymous function

  • immediate function call.

All dply functions when using column variables for operation, no need to dataframe$column_variable to access variable value, since it apply the same with(data, {..}) approach so that anything inside {...} will be searched within data than .GlobalEnv:

with(
  faraway3,
  {
    data.frame(
      平均=mean(原住民學生比率),
      缺失資料= (function(x)(sum(is.na(x))))(原住民學生比率)
    )
  }
) -> x

8.3 Mutate

  • fix class
faraway3 %>%
  mutate(
    縣市名稱 = factor(縣市名稱)
  ) -> faraway3

Package magrittr include %<>% that passes back the value on RHS back to LHS object.

library(magrittr)
faraway3 %<>%
  mutate(
    縣市名稱 = factor(縣市名稱)
  )

8.4 Filter

faraway3 %>%
  filter(縣市名稱=="[14]臺東縣") %>% 
  summarise(
    平均原住民學生比率=mean(原住民學生比率),
    偏遠學校數目=n()
  )

8.5 Group

data_explore$summary$by = list()
faraway3 %>%
  group_by(縣市名稱) %>%
  summarise(
    平均原住民學生比率=mean(原住民學生比率),
    偏遠學校數目=n()
  ) %>%
  ungroup() -> data_explore$summary$by$縣市名稱

Operations succeeding group_by will all be group-wise computation until ungroup is called.

8.6 Across

faraway3 %>% 
  mutate(
    across(
      .cols = c(縣市名稱:學生等級, "公/私立":"地區屬性"),
      .fns = factor
    )
  ) -> xx
faraway3 %>%
  summarise(
    across(
      .cols = where(is.numeric),
      .fns = mean
    )
  )

8.7 rowwise + c_across

grades <- data.frame(
  hw1 = sample(1:10, 50, replace = T),
  hw2 = sample(1:10, 50, replace = T),
  hw3 = sample(1:10, 50, replace = T)
)

grades %>%
  mutate(
    total = hw1+hw2+hw3
  )
grades %>%
  rowwise() %>%
  mutate(
    total = sum(
      c_across(cols=everything()
        ))
  )

8.8 tidyr::pivot

data_explore$table <- list()
data_explore$summary$by$縣市名稱 %>% #names()
  tidyr::pivot_longer(
    cols = 2:3, 
    names_to = "項目",
    values_to = "值"
  ) -> 
  data_explore$table$long
data_explore$table$long %>% #names()
  tidyr::pivot_wider(
    names_from = "縣市名稱",
    values_from = "值"
  ) -> data_explore$table$wide

[