Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add spreadsheet autotype function #64

Merged
merged 1 commit into from
Dec 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 81 additions & 5 deletions evaldo/builtins_spreadsheet.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,14 @@ var Builtins_spreadsheet = map[string]*env.Builtin{
return MakeBuiltinError(ps, "Unable to parse file as CSV.", "load\\csv")
}
spr := env.NewSpreadsheet(rows[0])
for _, row := range rows {
anyRow := make([]any, len(row))
for i, v := range row {
anyRow[i] = v
if len(rows) > 1 {
for _, row := range rows[1:] {
anyRow := make([]any, len(row))
for i, v := range row {
anyRow[i] = *env.NewString(v)
}
spr.AddRow(*env.NewSpreadsheetRow(anyRow, spr))
}
spr.AddRow(*env.NewSpreadsheetRow(anyRow, spr))
}
return *spr
default:
Expand Down Expand Up @@ -337,6 +339,23 @@ var Builtins_spreadsheet = map[string]*env.Builtin{
}
},
},
"autotype": {
Argsn: 2,
Doc: "",
Fn: func(ps *env.ProgramState, arg0 env.Object, arg1 env.Object, arg2 env.Object, arg3 env.Object, arg4 env.Object) (res env.Object) {
switch spr := arg0.(type) {
case env.Spreadsheet:
switch percent := arg1.(type) {
case env.Decimal:
return AutoType(ps, &spr, percent.Value)
default:
return MakeArgError(ps, 2, []env.Type{env.DecimalType}, "autotype")
}
default:
return MakeArgError(ps, 1, []env.Type{env.SpreadsheetType}, "autotype")
}
},
},
}

func GenerateColumn(ps *env.ProgramState, s env.Spreadsheet, name env.Word, extractCols env.Block, code env.Block) env.Object {
Expand Down Expand Up @@ -469,3 +488,60 @@ func Limit(ps *env.ProgramState, s env.Spreadsheet, n int) env.Object {
nspr.Rows = s.Rows[0:n]
return *nspr
}

func AutoType(ps *env.ProgramState, s *env.Spreadsheet, percent float64) env.Object {
colTypeCount := make(map[int]map[string]int)
for i := range s.Cols {
colTypeCount[i] = make(map[string]int)
}
for _, row := range s.Rows {
for i, val := range row.Values {
switch stringVal := val.(type) {
case env.String:
if _, err := strconv.Atoi(stringVal.Value); err == nil {
colTypeCount[i]["int"]++
} else if _, err = strconv.ParseFloat(stringVal.Value, 64); err == nil {
colTypeCount[i]["dec"]++
} else {
colTypeCount[i]["str"]++
}
default:
continue
}
}
}

lenRows := len(s.Rows)
newS := env.NewSpreadsheet(s.Cols)
for range s.Rows {
newRow := make([]any, len(s.Cols))
newS.AddRow(*env.NewSpreadsheetRow(newRow, newS))
}

for colNum, typeCount := range colTypeCount {
minRows := int(float64(lenRows) * percent)
var newType string
// if there's a mix of floats and ints, make it a float
if typeCount["dec"] > 0 && typeCount["dec"]+typeCount["int"] >= minRows {
newType = "dec"
} else if typeCount["int"] >= minRows {
newType = "int"
} else {
newType = "str"
}
for i, row := range s.Rows {
switch newType {
case "int":
intVal, _ := strconv.Atoi(row.Values[colNum].(env.String).Value)
newS.Rows[i].Values[colNum] = *env.NewInteger(int64(intVal))
case "dec":
floatVal, _ := strconv.ParseFloat(row.Values[colNum].(env.String).Value, 64)
newS.Rows[i].Values[colNum] = *env.NewDecimal(floatVal)
case "str":
newS.Rows[i].Values[colNum] = row.Values[colNum]
}
}
}

return *newS
}
22 changes: 11 additions & 11 deletions examples/spreadsheet/data.csv
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
ID,Name,Department
1,John Doe,Marketing
2,Jane Smith,Engineering
3,Robert Johnson,Human Resources
4,Julia Davis,Sales
5,James Brown,Engineering
6,Mary Williams,Marketing
7,Michael Miller,Human Resources
8,Linda Wilson,Sales
9,William Moore,Engineering
10,Elizabeth Taylor,Marketing
ID,Name,Department,Years at Company
1,John Doe,Marketing,2
2,Jane Smith,Engineering,5
3,Robert Johnson,Human Resources,1
4,Julia Davis,Sales,3
5,James Brown,Engineering,2
6,Mary Williams,Marketing,4
7,Michael Miller,Human Resources,3
8,Linda Wilson,Sales,1
9,William Moore,Engineering,2
10,Elizabeth Taylor,Marketing,3