第 8 章 Working with data frame
paste(
  "https://stats.moe.gov.tw/files/school", 
  104:108, 
  c(rep("faraway_new.csv",3), rep("faraway1.csv", 2)),
  sep="/"
) -> list_farawaySchools
list_farawaySchoolsFor a given link:
library(readr)
faraway3 <- read_csv("https://stats.moe.gov.tw/files/school/108/faraway3.csv")
data_explore <- list(
  dataframe = faraway3, 
  summary = list()
)library(dplyr)8.1 pipe
f(x) # becomes
x %>% f()
f(x, ...) # becomes
x %>% f(...)R 4.1.0 + build in |>
f(x) # becomes
x |> f()%___% is a user-defined special function of two arguments, say x and y. It can be used in two ways:
`%___%`(x, y)x `%___%` y
`%--->%` <- function(x, fn){
  fn(x)
}
exp(5)
5 %--->% expnested call
f(g(x)) # becomes
x %>% {f(g(.))} # where {} and . are neccessary.8.2 Summarise
check class
check NA
faraway3 %>% 
  summarise(
    平均=mean(原住民學生比率),
    缺失資料= (function(x)(sum(is.na(x))))(原住民學生比率)
  ) -> data_explore$summary$原住民學生比率anonymous function
immediate function call.
All dply functions when using column variables for operation, no need to dataframe$column_variable to access variable value, since it apply the same with(data, {..}) approach so that anything inside {...} will be searched within data than .GlobalEnv:
with(
  faraway3,
  {
    data.frame(
      平均=mean(原住民學生比率),
      缺失資料= (function(x)(sum(is.na(x))))(原住民學生比率)
    )
  }
) -> x8.3 Mutate
- fix class
 
faraway3 %>%
  mutate(
    縣市名稱 = factor(縣市名稱)
  ) -> faraway3Package magrittr include %<>% that passes back the value on RHS back to LHS object.
library(magrittr)
faraway3 %<>%
  mutate(
    縣市名稱 = factor(縣市名稱)
  )8.4 Filter
faraway3 %>%
  filter(縣市名稱=="[14]臺東縣") %>% 
  summarise(
    平均原住民學生比率=mean(原住民學生比率),
    偏遠學校數目=n()
  )8.5 Group
data_explore$summary$by = list()
faraway3 %>%
  group_by(縣市名稱) %>%
  summarise(
    平均原住民學生比率=mean(原住民學生比率),
    偏遠學校數目=n()
  ) %>%
  ungroup() -> data_explore$summary$by$縣市名稱Operations succeeding group_by will all be group-wise computation until ungroup is called.
8.6 Across
faraway3 %>% 
  mutate(
    across(
      .cols = c(縣市名稱:學生等級, "公/私立":"地區屬性"),
      .fns = factor
    )
  ) -> xxfaraway3 %>%
  summarise(
    across(
      .cols = where(is.numeric),
      .fns = mean
    )
  )8.7 rowwise + c_across
grades <- data.frame(
  hw1 = sample(1:10, 50, replace = T),
  hw2 = sample(1:10, 50, replace = T),
  hw3 = sample(1:10, 50, replace = T)
)
grades %>%
  mutate(
    total = hw1+hw2+hw3
  )grades %>%
  rowwise() %>%
  mutate(
    total = sum(
      c_across(cols=everything()
        ))
  )8.8 tidyr::pivot
data_explore$table <- list()
data_explore$summary$by$縣市名稱 %>% #names()
  tidyr::pivot_longer(
    cols = 2:3, 
    names_to = "項目",
    values_to = "值"
  ) -> 
  data_explore$table$longdata_explore$table$long %>% #names()
  tidyr::pivot_wider(
    names_from = "縣市名稱",
    values_from = "值"
  ) -> data_explore$table$wide[