第 8 章 Working with data frame
paste(
"https://stats.moe.gov.tw/files/school",
104:108,
c(rep("faraway_new.csv",3), rep("faraway1.csv", 2)),
sep="/"
-> list_farawaySchools
) list_farawaySchools
For a given link:
library(readr)
<- read_csv("https://stats.moe.gov.tw/files/school/108/faraway3.csv")
faraway3
<- list(
data_explore dataframe = faraway3,
summary = list()
)
library(dplyr)
8.1 pipe
f(x) # becomes
%>% f()
x
f(x, ...) # becomes
%>% f(...) x
R 4.1.0 + build in |>
f(x) # becomes
|> f() x
%___%
is a user-defined special function of two arguments, say x and y. It can be used in two ways:
`%___%`(x, y)
x `%___%` y
`%--->%` <- function(x, fn){
fn(x)
}exp(5)
5 %--->% exp
nested call
f(g(x)) # becomes
%>% {f(g(.))} # where {} and . are neccessary. x
8.2 Summarise
check class
check NA
%>%
faraway3 summarise(
=mean(原住民學生比率),
平均= (function(x)(sum(is.na(x))))(原住民學生比率)
缺失資料-> data_explore$summary$原住民學生比率 )
anonymous function
immediate function call.
All dply functions when using column variables for operation, no need to dataframe$column_variable
to access variable value, since it apply the same with(data, {..})
approach so that anything inside {...}
will be searched within data
than .GlobalEnv
:
with(
faraway3,
{data.frame(
=mean(原住民學生比率),
平均= (function(x)(sum(is.na(x))))(原住民學生比率)
缺失資料
)
}-> x )
8.3 Mutate
- fix class
%>%
faraway3 mutate(
= factor(縣市名稱)
縣市名稱 -> faraway3 )
Package magrittr include %<>%
that passes back the value on RHS back to LHS object.
library(magrittr)
%<>%
faraway3 mutate(
= factor(縣市名稱)
縣市名稱 )
8.4 Filter
%>%
faraway3 filter(縣市名稱=="[14]臺東縣") %>%
summarise(
=mean(原住民學生比率),
平均原住民學生比率=n()
偏遠學校數目 )
8.5 Group
$summary$by = list()
data_explore%>%
faraway3 group_by(縣市名稱) %>%
summarise(
=mean(原住民學生比率),
平均原住民學生比率=n()
偏遠學校數目%>%
) ungroup() -> data_explore$summary$by$縣市名稱
Operations succeeding group_by
will all be group-wise computation until ungroup
is called.
8.6 Across
%>%
faraway3 mutate(
across(
.cols = c(縣市名稱:學生等級, "公/私立":"地區屬性"),
.fns = factor
)-> xx )
%>%
faraway3 summarise(
across(
.cols = where(is.numeric),
.fns = mean
) )
8.7 rowwise + c_across
<- data.frame(
grades hw1 = sample(1:10, 50, replace = T),
hw2 = sample(1:10, 50, replace = T),
hw3 = sample(1:10, 50, replace = T)
)
%>%
grades mutate(
total = hw1+hw2+hw3
)
%>%
grades rowwise() %>%
mutate(
total = sum(
c_across(cols=everything()
)) )
8.8 tidyr::pivot
$table <- list()
data_explore$summary$by$縣市名稱 %>% #names()
data_explore::pivot_longer(
tidyrcols = 2:3,
names_to = "項目",
values_to = "值"
->
) $table$long data_explore
$table$long %>% #names()
data_explore::pivot_wider(
tidyrnames_from = "縣市名稱",
values_from = "值"
-> data_explore$table$wide )
[