第 8 章 Working with data frame
paste(
"https://stats.moe.gov.tw/files/school",
104:108,
c(rep("faraway_new.csv",3), rep("faraway1.csv", 2)),
sep="/"
) -> list_farawaySchools
list_farawaySchoolsFor a given link:
library(readr)
faraway3 <- read_csv("https://stats.moe.gov.tw/files/school/108/faraway3.csv")
data_explore <- list(
dataframe = faraway3,
summary = list()
)library(dplyr)8.1 pipe
f(x) # becomes
x %>% f()
f(x, ...) # becomes
x %>% f(...)R 4.1.0 + build in |>
f(x) # becomes
x |> f()%___% is a user-defined special function of two arguments, say x and y. It can be used in two ways:
`%___%`(x, y)x `%___%` y
`%--->%` <- function(x, fn){
fn(x)
}
exp(5)
5 %--->% expnested call
f(g(x)) # becomes
x %>% {f(g(.))} # where {} and . are neccessary.8.2 Summarise
check class
check NA
faraway3 %>%
summarise(
平均=mean(原住民學生比率),
缺失資料= (function(x)(sum(is.na(x))))(原住民學生比率)
) -> data_explore$summary$原住民學生比率anonymous function
immediate function call.
All dply functions when using column variables for operation, no need to dataframe$column_variable to access variable value, since it apply the same with(data, {..}) approach so that anything inside {...} will be searched within data than .GlobalEnv:
with(
faraway3,
{
data.frame(
平均=mean(原住民學生比率),
缺失資料= (function(x)(sum(is.na(x))))(原住民學生比率)
)
}
) -> x8.3 Mutate
- fix class
faraway3 %>%
mutate(
縣市名稱 = factor(縣市名稱)
) -> faraway3Package magrittr include %<>% that passes back the value on RHS back to LHS object.
library(magrittr)
faraway3 %<>%
mutate(
縣市名稱 = factor(縣市名稱)
)8.4 Filter
faraway3 %>%
filter(縣市名稱=="[14]臺東縣") %>%
summarise(
平均原住民學生比率=mean(原住民學生比率),
偏遠學校數目=n()
)8.5 Group
data_explore$summary$by = list()
faraway3 %>%
group_by(縣市名稱) %>%
summarise(
平均原住民學生比率=mean(原住民學生比率),
偏遠學校數目=n()
) %>%
ungroup() -> data_explore$summary$by$縣市名稱Operations succeeding group_by will all be group-wise computation until ungroup is called.
8.6 Across
faraway3 %>%
mutate(
across(
.cols = c(縣市名稱:學生等級, "公/私立":"地區屬性"),
.fns = factor
)
) -> xxfaraway3 %>%
summarise(
across(
.cols = where(is.numeric),
.fns = mean
)
)8.7 rowwise + c_across
grades <- data.frame(
hw1 = sample(1:10, 50, replace = T),
hw2 = sample(1:10, 50, replace = T),
hw3 = sample(1:10, 50, replace = T)
)
grades %>%
mutate(
total = hw1+hw2+hw3
)grades %>%
rowwise() %>%
mutate(
total = sum(
c_across(cols=everything()
))
)8.8 tidyr::pivot
data_explore$table <- list()
data_explore$summary$by$縣市名稱 %>% #names()
tidyr::pivot_longer(
cols = 2:3,
names_to = "項目",
values_to = "值"
) ->
data_explore$table$longdata_explore$table$long %>% #names()
tidyr::pivot_wider(
names_from = "縣市名稱",
values_from = "值"
) -> data_explore$table$wide[