-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsparklyr.R
43 lines (29 loc) · 1.04 KB
/
sparklyr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
library(sparklyr)
library(nycflights13)
library(dplyr)
# spark_install(version = "2.1.0")
# https://www.programcreek.com/2018/11/install-spark-in-local-mode-on-ubuntu/
sc <- spark_connect(master = "local")
flights_tbl <- copy_to(sc, flights, "flights")
src_tbls(sc)
dim(flights)
dim(flights_tbl)
filter(flights, month == 1)
filter(flights_tbl, month == 1)
# nie zawsze sparklyr dziala tak jak tego oczekujemy ------------------------
group_by(flights, month) %>%
summarise(n_flights = length(month))
group_by(flights_tbl, month) %>%
summarise(n_flights = length(month))
# idiomy z dplyr z reguly dzialaja -------------------------------------
group_by(flights_tbl, month) %>%
summarise(n_flights = n())
# ml w sparku
iris_tbl <- copy_to(sc, iris, "iris")
kmeans_model <- iris_tbl %>%
select(Petal_Width, Petal_Length) %>%
ml_kmeans(formula= ~ Petal_Width + Petal_Length, k = 3)
predicted <- ml_predict(kmeans_model, iris_tbl) %>%
select(Species, prediction) %>%
data.frame()
table(predicted[["Species"]], predicted[["prediction"]])