From 6c8c9a9c81abad7e55d2a7f75d776cd6684fe8b9 Mon Sep 17 00:00:00 2001 From: rokostik Date: Fri, 15 Dec 2023 22:29:12 +0100 Subject: [PATCH] add spreadsheet autotype function --- evaldo/builtins_spreadsheet.go | 86 ++++++++++++++++++++++++++++++++-- examples/spreadsheet/data.csv | 22 ++++----- 2 files changed, 92 insertions(+), 16 deletions(-) diff --git a/evaldo/builtins_spreadsheet.go b/evaldo/builtins_spreadsheet.go index 347fa8f1..78919eca 100644 --- a/evaldo/builtins_spreadsheet.go +++ b/evaldo/builtins_spreadsheet.go @@ -118,12 +118,14 @@ var Builtins_spreadsheet = map[string]*env.Builtin{ return MakeBuiltinError(ps, "Unable to parse file as CSV.", "load\\csv") } spr := env.NewSpreadsheet(rows[0]) - for _, row := range rows { - anyRow := make([]any, len(row)) - for i, v := range row { - anyRow[i] = v + if len(rows) > 1 { + for _, row := range rows[1:] { + anyRow := make([]any, len(row)) + for i, v := range row { + anyRow[i] = *env.NewString(v) + } + spr.AddRow(*env.NewSpreadsheetRow(anyRow, spr)) } - spr.AddRow(*env.NewSpreadsheetRow(anyRow, spr)) } return *spr default: @@ -337,6 +339,23 @@ var Builtins_spreadsheet = map[string]*env.Builtin{ } }, }, + "autotype": { + Argsn: 2, + Doc: "", + Fn: func(ps *env.ProgramState, arg0 env.Object, arg1 env.Object, arg2 env.Object, arg3 env.Object, arg4 env.Object) (res env.Object) { + switch spr := arg0.(type) { + case env.Spreadsheet: + switch percent := arg1.(type) { + case env.Decimal: + return AutoType(ps, &spr, percent.Value) + default: + return MakeArgError(ps, 2, []env.Type{env.DecimalType}, "autotype") + } + default: + return MakeArgError(ps, 1, []env.Type{env.SpreadsheetType}, "autotype") + } + }, + }, } func GenerateColumn(ps *env.ProgramState, s env.Spreadsheet, name env.Word, extractCols env.Block, code env.Block) env.Object { @@ -469,3 +488,60 @@ func Limit(ps *env.ProgramState, s env.Spreadsheet, n int) env.Object { nspr.Rows = s.Rows[0:n] return *nspr } + +func AutoType(ps *env.ProgramState, s *env.Spreadsheet, percent float64) env.Object { + colTypeCount := make(map[int]map[string]int) + for i := range s.Cols { + colTypeCount[i] = make(map[string]int) + } + for _, row := range s.Rows { + for i, val := range row.Values { + switch stringVal := val.(type) { + case env.String: + if _, err := strconv.Atoi(stringVal.Value); err == nil { + colTypeCount[i]["int"]++ + } else if _, err = strconv.ParseFloat(stringVal.Value, 64); err == nil { + colTypeCount[i]["dec"]++ + } else { + colTypeCount[i]["str"]++ + } + default: + continue + } + } + } + + lenRows := len(s.Rows) + newS := env.NewSpreadsheet(s.Cols) + for range s.Rows { + newRow := make([]any, len(s.Cols)) + newS.AddRow(*env.NewSpreadsheetRow(newRow, newS)) + } + + for colNum, typeCount := range colTypeCount { + minRows := int(float64(lenRows) * percent) + var newType string + // if there's a mix of floats and ints, make it a float + if typeCount["dec"] > 0 && typeCount["dec"]+typeCount["int"] >= minRows { + newType = "dec" + } else if typeCount["int"] >= minRows { + newType = "int" + } else { + newType = "str" + } + for i, row := range s.Rows { + switch newType { + case "int": + intVal, _ := strconv.Atoi(row.Values[colNum].(env.String).Value) + newS.Rows[i].Values[colNum] = *env.NewInteger(int64(intVal)) + case "dec": + floatVal, _ := strconv.ParseFloat(row.Values[colNum].(env.String).Value, 64) + newS.Rows[i].Values[colNum] = *env.NewDecimal(floatVal) + case "str": + newS.Rows[i].Values[colNum] = row.Values[colNum] + } + } + } + + return *newS +} diff --git a/examples/spreadsheet/data.csv b/examples/spreadsheet/data.csv index 03eef445..8963a230 100644 --- a/examples/spreadsheet/data.csv +++ b/examples/spreadsheet/data.csv @@ -1,11 +1,11 @@ -ID,Name,Department -1,John Doe,Marketing -2,Jane Smith,Engineering -3,Robert Johnson,Human Resources -4,Julia Davis,Sales -5,James Brown,Engineering -6,Mary Williams,Marketing -7,Michael Miller,Human Resources -8,Linda Wilson,Sales -9,William Moore,Engineering -10,Elizabeth Taylor,Marketing \ No newline at end of file +ID,Name,Department,Years at Company +1,John Doe,Marketing,2 +2,Jane Smith,Engineering,5 +3,Robert Johnson,Human Resources,1 +4,Julia Davis,Sales,3 +5,James Brown,Engineering,2 +6,Mary Williams,Marketing,4 +7,Michael Miller,Human Resources,3 +8,Linda Wilson,Sales,1 +9,William Moore,Engineering,2 +10,Elizabeth Taylor,Marketing,3 \ No newline at end of file