-
Notifications
You must be signed in to change notification settings - Fork 2
/
data-exploration.dib
125 lines (82 loc) · 2.05 KB
/
data-exploration.dib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!fsharp
#r "nuget:Microsoft.DotNet.Interactive.ExtensionLab,*-*"
#!fsharp
#r "nuget:Microsoft.Spark"
#!fsharp
open Microsoft.Spark.Sql;
#!fsharp
let sparkSession =
SparkSession
.Builder()
.AppName("on-dotnet-fsharp")
.GetOrCreate()
#!fsharp
let df =
sparkSession
.Read()
.Option("header","true")
.Option("inferSchema","true")
.Csv("data/nyc-restaurant-inspections.csv")
#!fsharp
df.PrintSchema()
#!fsharp
let boroughs =
df.GroupBy([|Functions.Col("BORO")|]).Count()
#!fsharp
boroughs.Show()
#!fsharp
let cleanBoroughs = boroughs.Filter(Functions.Col("BORO").NotEqual("0"))
#!fsharp
cleanBoroughs.Show()
#!fsharp
#r "nuget: Plotly.NET, 2.0.0-preview.5"
#r "nuget: Plotly.NET.Interactive, 2.0.0-preview.5"
#!fsharp
open Plotly.NET
#!fsharp
let boroughs,counts =
cleanBoroughs
.Select("BORO","count")
.OrderBy(Functions.Col("count").Desc())
.Collect()
|> Seq.map(fun row -> (string row.[0], string row.[1] |> int))
|> Seq.unzip
#!fsharp
let boroughColumn = Chart.Column(boroughs,counts)
#!fsharp
boroughColumn
#!fsharp
let coordinates =
df
.Select(
Functions.Col("CAMIS"),Functions.Col("Latitude"),Functions.Col("Longitude"))
.DropDuplicates("CAMIS")
#!fsharp
coordinates.Show()
#!fsharp
let nonZeroCoordinates = coordinates.Where("Latitude != 0.0 OR Longitude != 0.0")
#!fsharp
let labels, lat, lon =
nonZeroCoordinates.Select("CAMIS","Latitude","Longitude").Collect()
|> Seq.map(fun row -> string row.[0], string row.[1] |> float, string row.[2] |> float)
|> Seq.unzip3
#!fsharp
let pointMapbox =
Chart.PointMapbox(
lon,lat,
Labels = labels,
TextPosition = StyleParam.TextPosition.TopCenter
)
|> Chart.withMapbox(
Mapbox.init(
Style=StyleParam.MapboxStyle.OpenStreetMap,
Center=(-73.99,40.73),
Zoom=8.
)
)
#!fsharp
pointMapbox
#!fsharp
let prepData =
df
.Select("CAMIS","INSPECTION DATE", "VIOLATION CODE", "SCORE")