pandas_GUI.new_pandas_column_GUI

  1def new_pandas_column_GUI(df_info=None, show_text_col = False, **kwargs):
  2    """
  3    If passed no parameters this will look for all the dataframes in the user
  4    namespace and make them available for adding a new column to. Once a
  5    dataframe is chosen only the numerical columns from that dataframe will
  6    be available for inclusion in the new column expression.
  7
  8    If you wish to allow only certain dataframes or have them show up as
  9    user friendly names in the menus provide that information in the first
 10    paramater df_info.
 11
 12    To allow inclusion of text columns pass True for show_text_col.
 13
 14    :param bool show_text_col: (default = False). When True columns
 15    containing text will be shown.
 16
 17    :param list df_info: List of Lists [[object,globalname,
 18    userfriendly]],..]
 19      * object -- pandas.DataFrame
 20      * globalname -- string name of the object in the user global name space.
 21      * userfriendly -- string name to display for user selection.
 22      
 23    :keyword bool findframes: default = True. If set to false and dataframes
 24    are passed in dfs_info, will not search for dataframes in the user
 25    namespace.
 26    """
 27
 28    from ipywidgets import Layout, Box, HBox, VBox, GridBox, Tab, \
 29        Dropdown, Label, Text, Textarea, Button, Checkbox, Output
 30    from ipywidgets import HTML as richLabel
 31    from IPython.display import display, HTML
 32    from IPython import get_ipython
 33    from JPSLUtils.utils import new_cell_immediately_below,\
 34        select_cell_immediately_below, move_cursor_in_current_cell, \
 35        insert_text_into_next_cell, insert_text_at_beginning_of_current_cell, \
 36        insert_newline_at_end_of_current_cell, select_containing_cell, \
 37        delete_selected_cell, replace_text_of_next_cell
 38
 39    from .utils import find_pandas_dataframe_names, build_run_snip_widget
 40    from IPython import get_ipython
 41    global_dict = get_ipython().user_ns
 42    JPSLUtils = global_dict["JPSLUtils"]
 43    dfs_info = []
 44    if isinstance(df_info,list):
 45        for k in df_info:
 46            dfs_info.append(k)
 47    findframes = kwargs.pop('findframes',True)
 48    if findframes:
 49        for k in find_pandas_dataframe_names():
 50            dfs_info.append([global_dict[k],k,k])
 51    friendly_to_globalname = {k[2]:k[1] for k in dfs_info}
 52    friendly_to_object = {k[2]:k[0] for k in dfs_info}
 53
 54    #### Define GUI Elements ####
 55
 56    importstr = '# CODE BLOCK generated using new_pandas_column_GUI().\n' \
 57                '# See https://jupyterphysscilab.github.io/' \
 58                'jupyter_Pandas_GUI.\n' \
 59                '# Imports (no effect if already imported)\n' \
 60                'import numpy as np\n'
 61    allbutlastline = importstr
 62    lastline = ''
 63
 64    def split_to_all_but_last_and_last(text):
 65        all_but_last = ''
 66        last = ''
 67        lines = text.split('\n')
 68        for k in range(len(lines)):
 69            if k < len(lines) - 1:
 70                all_but_last += lines[k] + '\n'
 71            else:
 72                last = lines[k]
 73        return all_but_last, last
 74            
 75    # DataFrame Choice (Step 1)
 76    step1instr = Label(value = 'Select the DataFrame to work with.')
 77    tempopts = []
 78    tempopts.append('Choose')
 79    for k in dfs_info:
 80        tempopts.append(k[2])
 81    whichframe = Dropdown(options=tempopts,
 82                                description='DataFrame: ',)
 83
 84    def update_columns(change):
 85        df = friendly_to_object[change['new']]
 86        tempcols = df.columns.values
 87        tempopt = ['Choose column to insert.']
 88        for k in tempcols:
 89            if show_text_col:
 90                tempopt.append(k)
 91            else:
 92                if df[k].dtype != 'O':
 93                    tempopt.append(k)
 94        whichcolumn.options = tempopt
 95        pass
 96    whichframe.observe(update_columns, names='value')
 97    step1 = VBox(children=[step1instr, whichframe])
 98
 99    # Step 2
100    newname = Text(placeholder='Type name for new column.')
101    step2instr = richLabel(
102        value='Pick a name for the new column. The expression will be ' \
103              'built in the cell (textbox) below. Click the "Insert" button ' \
104              'when you are satisfied with the name.')
105    insertname = Button(description="Insert")
106
107    def do_insertname(change):
108        framename = friendly_to_globalname[whichframe.value]
109        codestr = framename + '[\'' + newname.value + '\'] = '
110        if JPSLUtils.notebookenv == 'NBClassic':
111            select_containing_cell('newcolGUI')
112            select_cell_immediately_below()
113            insert_newline_at_end_of_current_cell(codestr)
114        else:
115            allbutlastline, lastline = split_to_all_but_last_and_last(
116                codearea.sniptext.value)
117            if lastline == '' or lastline == '\n':
118                codearea.sniptext.value = allbutlastline + '\n' + codestr
119            else:
120                if lastline.endswith('\n'):
121                    codearea.sniptext.value = allbutlastline + lastline + codestr
122                else:
123                    codearea.sniptext.value = allbutlastline + lastline + '\n' + \
124                                   codestr
125        pass
126
127    insertname.on_click(do_insertname)
128
129    step2 = VBox(children=[step2instr, HBox(children=[newname,
130                                           insertname])])
131
132    # Step 3
133    whichcolumn = Dropdown(options=['Choose column to insert.'],
134                           description='Column: ',
135                           )
136
137    def column_insert(change):
138        col = change['new']
139        if col == 'Choose column to insert.':
140            return
141        framename = friendly_to_globalname[whichframe.value]
142        text = framename + '[\'' + col + '\']'
143        if JPSLUtils.notebookenv == 'NBClassic':
144            select_containing_cell('newcolGUI')
145            insert_text_into_next_cell(text)
146        else:
147            allbutlastline, lastline = split_to_all_but_last_and_last(
148                codearea.sniptext.value)
149            if lastline.endswith('()') or lastline.endswith('+)') or \
150                lastline.endswith('-)') or lastline.endswith('*)') or \
151                lastline.endswith('/)') or lastline.endswith(' )'):
152                lastline = lastline[:-1] + text +')'
153            else:
154                lastline += text
155            codearea.sniptext.value = allbutlastline+lastline
156        whichcolumn.value = 'Choose column to insert.'
157        pass
158
159    whichcolumn.observe(column_insert, names='value')
160    step3instr = richLabel(
161        value='Add the calculation to the right hand side of the = using the '
162              'menus to insert columns, math operations or functions. ' \
163              'Your choices will be appended to the end of the last line ' \
164              'or inserted within the last set of parentheses. You can also' \
165              ' manually edit the expression.')
166    oplst = ['Choose an operation to insert.', '+', '-', '*', '/', '**',
167             'exp()', 'log10()', 'ln()', 'sqrt()', 'sin()', 'cos()',
168             'tan()', 'cot()', 'asin()', 'acos()', 'atan()', 'acot()']
169    whichop = Dropdown(options=oplst,
170                       description='Operation: ')
171
172    def op_insert(change):
173        need_numpy = False
174        np_list = ['exp()', 'log10()', 'ln()', 'sqrt()', 'sin()', 'cos()',
175                   'tan()', 'cot()', 'asin()', 'acos()', 'atan()',
176                   'acot()']
177        op = change['new']
178        if op == 'Choose an operation to insert.':
179            return
180        if op in np_list:
181            need_numpy = True
182            if op == 'ln()':
183                op = 'log()'
184            op = 'np.' + op
185        else:
186            op = ' ' + op + ' '
187        if JPSLUtils.notebookenv == 'NBClassic':
188            select_containing_cell('newcolGUI')
189            insert_text_into_next_cell(op)
190            if need_numpy:
191                move_cursor_in_current_cell(-1)
192        else:
193            allbutlastline, lastline = split_to_all_but_last_and_last(
194                codearea.sniptext.value)
195            if lastline.endswith('()') or lastline.endswith('+)') or \
196                lastline.endswith('-)') or lastline.endswith('*)') or \
197                lastline.endswith('/)') or lastline.endswith(' )') or \
198                lastline.endswith('])'):
199                lastline = lastline[:-1] + op +')'
200            else:
201                lastline += op
202            codearea.sniptext.value = allbutlastline+lastline
203        whichop.value = 'Choose an operation to insert.'
204        pass
205
206    whichop.observe(op_insert, names='value')
207
208    step3drops = HBox(children=[whichcolumn, whichop])
209    step3 = VBox(children=[step3instr, step3drops])
210
211    # Step 4
212    step4instr = richLabel(
213        value = 'Carefully check the expression for typos:' \
214            '<ul><li>Check that parentheses, brackets or braces are properly ' \
215              'paired.</li>' \
216            '<li>Check that all double and single quotes are also ' \
217              'properly paired.</li>' \
218            '<li>Check that all function calls are prefaced by ' \
219              'an <code>np.</code>.</li></ul>' \
220            'Uncheck "Display updated data set", if you do not wish to ' \
221                'display a summary of the updated data set. ' \
222            '<span style="color:red;">Click \'OK\' to do final code updates. ' \
223            '</span>In the classic Jupyter notebook this button will also ' \
224            'run the code and clear this GUI from the notebook.'
225    )
226    show_updated_df_box = Checkbox(description='Show updated data set.',
227                                   value=True,
228                                   layout=Layout(left='-90px'))
229    gen_col_but = Button(description='      OK      ')
230
231    def run_new_col_decl(change):
232        from IPython.display import display, HTML
233        from IPython.display import Javascript as JS
234        # if show updated dataframe is checked append dataframe name as last line.
235        if show_updated_df_box.value == True:
236            text = '# Display summary of updated data set.\n'
237            text += 'display('+friendly_to_globalname[whichframe.value]+')'
238            if JPSLUtils.notebookenv == 'NBClassic':
239                select_containing_cell('newcolGUI')
240                select_cell_immediately_below()
241                insert_newline_at_end_of_current_cell(text)
242            else:
243                allbutlastline, lastline = split_to_all_but_last_and_last(
244                    codearea.sniptext.value)
245                if lastline == '' or lastline == '\n':
246                    codearea.sniptext.value = allbutlastline + '\n' + text
247                else:
248                    codearea.sniptext.value = allbutlastline + lastline + \
249                                              '\n\n' + text
250
251        # run composed operation
252        if JPSLUtils.notebookenv == 'NBClassic':
253            select_containing_cell('newcolGUI')
254            select_cell_immediately_below()
255            display(JS('Jupyter.notebook.get_selected_cell().execute()'))
256            select_containing_cell('newcolGUI')
257            delete_selected_cell()
258        pass
259
260    gen_col_but.on_click(run_new_col_decl)
261    step4act = VBox(children=[show_updated_df_box, gen_col_but])
262    step4 = HBox(children=[step4instr, step4act])
263
264    steps = Tab(children=[step1, step2, step3, step4])
265    steps.set_title(0, 'Step 1')
266    steps.set_title(1, 'Step 2')
267    steps.set_title(2, 'Step 3')
268    steps.set_title(3, 'Step 4')
269    
270    output = Output()
271    codearea = build_run_snip_widget(importstr, output)
272
273    with output:
274        display(HTML(
275        "<h3 id ='newcolGUI' style='text-align:center;'>Pandas New Calculated "
276        "Column "
277        "Composer</h3>"))
278        display(steps)
279    if JPSLUtils.notebookenv == 'NBClassic':
280        display(output)
281        select_containing_cell('newcolGUI')
282        new_cell_immediately_below()
283        select_containing_cell('newcolGUI')
284        replace_text_of_next_cell(importstr)
285    else:
286        with output:
287            display(codearea)
288        display(output)
289    pass
def new_pandas_column_GUI(df_info=None, show_text_col=False, **kwargs):
  2def new_pandas_column_GUI(df_info=None, show_text_col = False, **kwargs):
  3    """
  4    If passed no parameters this will look for all the dataframes in the user
  5    namespace and make them available for adding a new column to. Once a
  6    dataframe is chosen only the numerical columns from that dataframe will
  7    be available for inclusion in the new column expression.
  8
  9    If you wish to allow only certain dataframes or have them show up as
 10    user friendly names in the menus provide that information in the first
 11    paramater df_info.
 12
 13    To allow inclusion of text columns pass True for show_text_col.
 14
 15    :param bool show_text_col: (default = False). When True columns
 16    containing text will be shown.
 17
 18    :param list df_info: List of Lists [[object,globalname,
 19    userfriendly]],..]
 20      * object -- pandas.DataFrame
 21      * globalname -- string name of the object in the user global name space.
 22      * userfriendly -- string name to display for user selection.
 23      
 24    :keyword bool findframes: default = True. If set to false and dataframes
 25    are passed in dfs_info, will not search for dataframes in the user
 26    namespace.
 27    """
 28
 29    from ipywidgets import Layout, Box, HBox, VBox, GridBox, Tab, \
 30        Dropdown, Label, Text, Textarea, Button, Checkbox, Output
 31    from ipywidgets import HTML as richLabel
 32    from IPython.display import display, HTML
 33    from IPython import get_ipython
 34    from JPSLUtils.utils import new_cell_immediately_below,\
 35        select_cell_immediately_below, move_cursor_in_current_cell, \
 36        insert_text_into_next_cell, insert_text_at_beginning_of_current_cell, \
 37        insert_newline_at_end_of_current_cell, select_containing_cell, \
 38        delete_selected_cell, replace_text_of_next_cell
 39
 40    from .utils import find_pandas_dataframe_names, build_run_snip_widget
 41    from IPython import get_ipython
 42    global_dict = get_ipython().user_ns
 43    JPSLUtils = global_dict["JPSLUtils"]
 44    dfs_info = []
 45    if isinstance(df_info,list):
 46        for k in df_info:
 47            dfs_info.append(k)
 48    findframes = kwargs.pop('findframes',True)
 49    if findframes:
 50        for k in find_pandas_dataframe_names():
 51            dfs_info.append([global_dict[k],k,k])
 52    friendly_to_globalname = {k[2]:k[1] for k in dfs_info}
 53    friendly_to_object = {k[2]:k[0] for k in dfs_info}
 54
 55    #### Define GUI Elements ####
 56
 57    importstr = '# CODE BLOCK generated using new_pandas_column_GUI().\n' \
 58                '# See https://jupyterphysscilab.github.io/' \
 59                'jupyter_Pandas_GUI.\n' \
 60                '# Imports (no effect if already imported)\n' \
 61                'import numpy as np\n'
 62    allbutlastline = importstr
 63    lastline = ''
 64
 65    def split_to_all_but_last_and_last(text):
 66        all_but_last = ''
 67        last = ''
 68        lines = text.split('\n')
 69        for k in range(len(lines)):
 70            if k < len(lines) - 1:
 71                all_but_last += lines[k] + '\n'
 72            else:
 73                last = lines[k]
 74        return all_but_last, last
 75            
 76    # DataFrame Choice (Step 1)
 77    step1instr = Label(value = 'Select the DataFrame to work with.')
 78    tempopts = []
 79    tempopts.append('Choose')
 80    for k in dfs_info:
 81        tempopts.append(k[2])
 82    whichframe = Dropdown(options=tempopts,
 83                                description='DataFrame: ',)
 84
 85    def update_columns(change):
 86        df = friendly_to_object[change['new']]
 87        tempcols = df.columns.values
 88        tempopt = ['Choose column to insert.']
 89        for k in tempcols:
 90            if show_text_col:
 91                tempopt.append(k)
 92            else:
 93                if df[k].dtype != 'O':
 94                    tempopt.append(k)
 95        whichcolumn.options = tempopt
 96        pass
 97    whichframe.observe(update_columns, names='value')
 98    step1 = VBox(children=[step1instr, whichframe])
 99
100    # Step 2
101    newname = Text(placeholder='Type name for new column.')
102    step2instr = richLabel(
103        value='Pick a name for the new column. The expression will be ' \
104              'built in the cell (textbox) below. Click the "Insert" button ' \
105              'when you are satisfied with the name.')
106    insertname = Button(description="Insert")
107
108    def do_insertname(change):
109        framename = friendly_to_globalname[whichframe.value]
110        codestr = framename + '[\'' + newname.value + '\'] = '
111        if JPSLUtils.notebookenv == 'NBClassic':
112            select_containing_cell('newcolGUI')
113            select_cell_immediately_below()
114            insert_newline_at_end_of_current_cell(codestr)
115        else:
116            allbutlastline, lastline = split_to_all_but_last_and_last(
117                codearea.sniptext.value)
118            if lastline == '' or lastline == '\n':
119                codearea.sniptext.value = allbutlastline + '\n' + codestr
120            else:
121                if lastline.endswith('\n'):
122                    codearea.sniptext.value = allbutlastline + lastline + codestr
123                else:
124                    codearea.sniptext.value = allbutlastline + lastline + '\n' + \
125                                   codestr
126        pass
127
128    insertname.on_click(do_insertname)
129
130    step2 = VBox(children=[step2instr, HBox(children=[newname,
131                                           insertname])])
132
133    # Step 3
134    whichcolumn = Dropdown(options=['Choose column to insert.'],
135                           description='Column: ',
136                           )
137
138    def column_insert(change):
139        col = change['new']
140        if col == 'Choose column to insert.':
141            return
142        framename = friendly_to_globalname[whichframe.value]
143        text = framename + '[\'' + col + '\']'
144        if JPSLUtils.notebookenv == 'NBClassic':
145            select_containing_cell('newcolGUI')
146            insert_text_into_next_cell(text)
147        else:
148            allbutlastline, lastline = split_to_all_but_last_and_last(
149                codearea.sniptext.value)
150            if lastline.endswith('()') or lastline.endswith('+)') or \
151                lastline.endswith('-)') or lastline.endswith('*)') or \
152                lastline.endswith('/)') or lastline.endswith(' )'):
153                lastline = lastline[:-1] + text +')'
154            else:
155                lastline += text
156            codearea.sniptext.value = allbutlastline+lastline
157        whichcolumn.value = 'Choose column to insert.'
158        pass
159
160    whichcolumn.observe(column_insert, names='value')
161    step3instr = richLabel(
162        value='Add the calculation to the right hand side of the = using the '
163              'menus to insert columns, math operations or functions. ' \
164              'Your choices will be appended to the end of the last line ' \
165              'or inserted within the last set of parentheses. You can also' \
166              ' manually edit the expression.')
167    oplst = ['Choose an operation to insert.', '+', '-', '*', '/', '**',
168             'exp()', 'log10()', 'ln()', 'sqrt()', 'sin()', 'cos()',
169             'tan()', 'cot()', 'asin()', 'acos()', 'atan()', 'acot()']
170    whichop = Dropdown(options=oplst,
171                       description='Operation: ')
172
173    def op_insert(change):
174        need_numpy = False
175        np_list = ['exp()', 'log10()', 'ln()', 'sqrt()', 'sin()', 'cos()',
176                   'tan()', 'cot()', 'asin()', 'acos()', 'atan()',
177                   'acot()']
178        op = change['new']
179        if op == 'Choose an operation to insert.':
180            return
181        if op in np_list:
182            need_numpy = True
183            if op == 'ln()':
184                op = 'log()'
185            op = 'np.' + op
186        else:
187            op = ' ' + op + ' '
188        if JPSLUtils.notebookenv == 'NBClassic':
189            select_containing_cell('newcolGUI')
190            insert_text_into_next_cell(op)
191            if need_numpy:
192                move_cursor_in_current_cell(-1)
193        else:
194            allbutlastline, lastline = split_to_all_but_last_and_last(
195                codearea.sniptext.value)
196            if lastline.endswith('()') or lastline.endswith('+)') or \
197                lastline.endswith('-)') or lastline.endswith('*)') or \
198                lastline.endswith('/)') or lastline.endswith(' )') or \
199                lastline.endswith('])'):
200                lastline = lastline[:-1] + op +')'
201            else:
202                lastline += op
203            codearea.sniptext.value = allbutlastline+lastline
204        whichop.value = 'Choose an operation to insert.'
205        pass
206
207    whichop.observe(op_insert, names='value')
208
209    step3drops = HBox(children=[whichcolumn, whichop])
210    step3 = VBox(children=[step3instr, step3drops])
211
212    # Step 4
213    step4instr = richLabel(
214        value = 'Carefully check the expression for typos:' \
215            '<ul><li>Check that parentheses, brackets or braces are properly ' \
216              'paired.</li>' \
217            '<li>Check that all double and single quotes are also ' \
218              'properly paired.</li>' \
219            '<li>Check that all function calls are prefaced by ' \
220              'an <code>np.</code>.</li></ul>' \
221            'Uncheck "Display updated data set", if you do not wish to ' \
222                'display a summary of the updated data set. ' \
223            '<span style="color:red;">Click \'OK\' to do final code updates. ' \
224            '</span>In the classic Jupyter notebook this button will also ' \
225            'run the code and clear this GUI from the notebook.'
226    )
227    show_updated_df_box = Checkbox(description='Show updated data set.',
228                                   value=True,
229                                   layout=Layout(left='-90px'))
230    gen_col_but = Button(description='      OK      ')
231
232    def run_new_col_decl(change):
233        from IPython.display import display, HTML
234        from IPython.display import Javascript as JS
235        # if show updated dataframe is checked append dataframe name as last line.
236        if show_updated_df_box.value == True:
237            text = '# Display summary of updated data set.\n'
238            text += 'display('+friendly_to_globalname[whichframe.value]+')'
239            if JPSLUtils.notebookenv == 'NBClassic':
240                select_containing_cell('newcolGUI')
241                select_cell_immediately_below()
242                insert_newline_at_end_of_current_cell(text)
243            else:
244                allbutlastline, lastline = split_to_all_but_last_and_last(
245                    codearea.sniptext.value)
246                if lastline == '' or lastline == '\n':
247                    codearea.sniptext.value = allbutlastline + '\n' + text
248                else:
249                    codearea.sniptext.value = allbutlastline + lastline + \
250                                              '\n\n' + text
251
252        # run composed operation
253        if JPSLUtils.notebookenv == 'NBClassic':
254            select_containing_cell('newcolGUI')
255            select_cell_immediately_below()
256            display(JS('Jupyter.notebook.get_selected_cell().execute()'))
257            select_containing_cell('newcolGUI')
258            delete_selected_cell()
259        pass
260
261    gen_col_but.on_click(run_new_col_decl)
262    step4act = VBox(children=[show_updated_df_box, gen_col_but])
263    step4 = HBox(children=[step4instr, step4act])
264
265    steps = Tab(children=[step1, step2, step3, step4])
266    steps.set_title(0, 'Step 1')
267    steps.set_title(1, 'Step 2')
268    steps.set_title(2, 'Step 3')
269    steps.set_title(3, 'Step 4')
270    
271    output = Output()
272    codearea = build_run_snip_widget(importstr, output)
273
274    with output:
275        display(HTML(
276        "<h3 id ='newcolGUI' style='text-align:center;'>Pandas New Calculated "
277        "Column "
278        "Composer</h3>"))
279        display(steps)
280    if JPSLUtils.notebookenv == 'NBClassic':
281        display(output)
282        select_containing_cell('newcolGUI')
283        new_cell_immediately_below()
284        select_containing_cell('newcolGUI')
285        replace_text_of_next_cell(importstr)
286    else:
287        with output:
288            display(codearea)
289        display(output)
290    pass

If passed no parameters this will look for all the dataframes in the user namespace and make them available for adding a new column to. Once a dataframe is chosen only the numerical columns from that dataframe will be available for inclusion in the new column expression.

If you wish to allow only certain dataframes or have them show up as user friendly names in the menus provide that information in the first paramater df_info.

To allow inclusion of text columns pass True for show_text_col.

Parameters
  • bool show_text_col: (default = False). When True columns containing text will be shown.

  • list df_info: List of Lists [[object,globalname, userfriendly]],..]

    • object -- pandas.DataFrame
    • globalname -- string name of the object in the user global name space.
    • userfriendly -- string name to display for user selection.

:keyword bool findframes: default = True. If set to false and dataframes are passed in dfs_info, will not search for dataframes in the user namespace.