-
Notifications
You must be signed in to change notification settings - Fork 0
/
tagWidget.py
121 lines (105 loc) · 5.9 KB
/
tagWidget.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# This is a script that allows you to edit csv files within an output cell of a jupyter notebook, and save the changes to a new csv file.
# Example drawn from: https://medium.com/@williams.evanpaul/edit-pandas-dataframes-within-jupyterlab-36f129129496
# TO USE THIS SCRIPT IN A NOTEBOOK:
# 1) import (from cwd) tagWidget.py functions and qgrid:
#----> from tagWidget import export_edits, clear_edits, get_current_row, get_edits
#----> import qgrid
# 2) with pandas create a dataframe of e.g. tweets you would like to tag:
#----> import pandas as pd
#----> df = pd.read_csv(manual_annotation_file.csv)
# 3) covert df into qgrid dataframe:
#----> qgrid_df = qgrid.show_grid(df)
# 4) apply editable cells within the notebook output cell
#----> qgrid.on(['selection_changed'], get_current_row)
#----> qgrid.on(['cell_edited'], get_edits)
# 4) call qgrid_df and click on any cell to edit. Once you've edited a cell and clicked away, ctrl+z will not undo the change made, so be careful when clicking on the cells as not to delete the text you intend to tag
#----> qgrid_df
# 5) create new df containing the changes made to the dataframe:
#----> q = qgrid_df.get_changed_df()
# 6) save the whole 'q' table output to a new file:
#----> q.to_csv(...)
import pandas as pd
import qgrid
import ipywidgets as widgets
# set display options
qgrid.set_grid_option('forceFitColumns', True)
qgrid.set_grid_option('defaultColumnWidth', 400)
qgrid.set_grid_option('enableColumnReorder', True)
qgrid.set_grid_option('rowHeight', 120)
pd.options.display.max_columns = 25
pd.set_option('max_colwidth', 1000)
"""--------------------------------------
1. display output for current row
-----------------------------------------"""
# Create and display output widget for the currently selected row
current_row = widgets.Output(layout=widgets.Layout(border='1px solid black',
height='99%',
width='99%',
overflow_x='auto',
overflow_y='auto',
overflow='auto'#,
# display='inline-flex',
# flex_wrap='wrap',
# # flex='auto',
# flex_flow='row wrap',
# align_content='center'
))
# overflow-wrap: break-word;
# The widget will appear as the output of this cell
# Right-click it and select "Create New View for Output", then drag it to the right side of the screen
# Finally, hide the cell's output by clicking the blue bar to the left of the output
"""--------------------------------------
2. display the output widget for edited rows, and create the DataFrame to hold the edits
-----------------------------------------"""
# Make the DataFrame to hold edited rows
edit_cols = ['data', 'index', 'column', 'old', 'new']
edits = pd.DataFrame(columns=edit_cols)
# Make the three components for the edited rows widget
edited_cells = widgets.Output(layout=widgets.Layout(border='1px solid black', overflow_y='auto'))#, height='150px',overflow_x='auto', overflow='auto',display='inline-flex',flex_wrap='wrap',flex_flow='row wrap',align_content='center'))
# flex='auto',
edits_file_name = widgets.Text(placeholder='File name')
export_button = widgets.Button(description='Export to CSV', tooltip='Export your edits to CSV')
clear_button = widgets.Button(description='Clear edits', tooltip='Clear the output widget and all stored edits')
# Function to use when the 'Export to CSV' button is clicked
def export_edits(sender):
name = edits_file_name.value
edits.to_csv(f'{name}.csv')
# Function to use when the 'Clear edits' button is clicked
def clear_edits(sender):
edited_cells.clear_output()
global edits
edits = pd.DataFrame(columns=edit_cols)
# Construct and display the widget
# The widget will appear as the output of this cell
# Right-click it and select "Create New View for Output", then drag it to the right side of the screen
# Finally, hide the cell's output by clicking the blue bar to the left of the output
export_button.on_click(export_edits)
clear_button.on_click(clear_edits)
export_features = widgets.HBox([edits_file_name, export_button, clear_button])
widgets.VBox([export_features, edited_cells])
# This cell creates two Qgrid event handlers and links them with the appropriate output widgets
# Display the currently selected row from any Qgrid widget in the current_row output widget
# This will be activated when Qgrid detects that a new row is selected
def get_current_row(event, qgrid_widget):
output_area = current_row
with output_area:
display(qgrid_widget.get_selected_df().T)
output_area.clear_output(wait=True)
# Display the edits in the edited_cells output widget and store them in the edits DataFrame
# This will be activated when Qgrid detects that a cell is edited
def get_edits(event, qgrid_widget):
output_area = edited_cells
with output_area:
event['data']=[name for name, val in globals().items() if val is qgrid_widget.df][0]
event_index = event['index']
event_column = event['column']
event_old = event['old']
event_new = event['new']
event_data = event['data']
print(f'{event_data}[{event_index}, {event_column}] \t old: {event_old} \t new: {event_new}')
relevant = ['index', 'column', 'data', 'old', 'new']
global edits
edits = edits.append({k: event[k] for k in relevant}, ignore_index=True)
# Link the two functions above to the appropriate Qgrid events
qgrid.on(['selection_changed'], get_current_row)
qgrid.on(['cell_edited'], get_edits)