columns

11/8/2019 - 3:13 PM

import pandas as pd
import numpy as np
df = pd.read_excel("../data/excel-comp-data.xlsx")

df["total"] = df["Jan"] + df["Feb"] + df["Mar"]

add_new_column_assign.py

import pandas as pd
import numpy as np
from xlsxwriter.utility import xl_rowcol_to_cell
df = pd.read_excel("../data/excel-comp-datav2.xlsx")

df = df.assign(total=(df['Jan'] + df['Feb'] + df['Mar']))
df.head()

df = df.assign(quota_pct=(1+(df['total'] - df['quota'])/df['quota']))
df.head()

add_new_column_sum_percent.py

import pandas as pd
 
df = pd.read_excel("../data/sales_transactions.xlsx")
df
 
df.groupby('order')["ext price"].transform('sum')
 
df["Order_Total"] = df.groupby('order')["ext price"].transform('sum')
df["Percent_of_Order"] = df["ext price"] / df["Order_Total"]
df

autofit_columns.py

import xlwings as xw
import numpy as np
import pandas as pd
import datetime as dt
import time
import sys
 
# Fire up a new book in the active Excel instance
wb1 = xw.Book()
# Connects to an unsaved book (looks in all Excel instances)
wb1 = xw.Book('Book1')
sheet = wb1.sheets[0]

# autofit columns and rows based on single Cell
sheet.range('A3').autofit()

# autofit columns based on Range
sheet.range('A1:C3').columns.autofit()

# autofit a whole column
sheet.range('A:A').autofit()

map_column_names.py

# B-073 Tips for Selecting Columns in a DataFrame
# https://pbpython.com/selecting-columns.html

import pandas as pd
import numpy as np
df = pd.read_csv(
    'https://data.cityofnewyork.us/api/views/vfnx-vebw/rows.csv?accessType=DOWNLOAD&bom=true&format=true'
)
Build a mapping list so we can see the index of all the columns

col_mapping = [f"{c[0]}:{c[1]}" for c in enumerate(df.columns)]

col_mapping

['0:X',
 '1:Y',
 '2:Unique Squirrel ID',
 '3:Hectare',
 '4:Shift',
 '5:Date',
 '6:Hectare Squirrel Number',
 '7:Age',
 '8:Primary Fur Color',
 '9:Highlight Fur Color',
 '10:Combination of Primary and Highlight Color',
 '11:Color notes',
 '12:Location',
 '13:Above Ground Sighter Measurement',
 '14:Specific Location',
 '15:Running',
 '16:Chasing',
 '17:Climbing',
 '18:Eating',
 '19:Foraging',
 '20:Other Activities',
 '21:Kuks',
 '22:Quaas',
 '23:Moans',
 '24:Tail flags',
 '25:Tail twitches',
 '26:Approaches',
 '27:Indifferent',
 '28:Runs from',
 '29:Other Interactions',
 '30:Lat/Long',
 '31:Zip Codes',
 '32:Community Districts',
 '33:Borough Boundaries',
 '34:City Council Districts',
 '35:Police Precincts']
 
# We can also build a dictionary

col_mapping_dict = {c[0]:c[1] for c in enumerate(df.columns)}
col_mapping_dict
{0: 'X',
 1: 'Y',
 2: 'Unique Squirrel ID',
 3: 'Hectare',
 4: 'Shift',
 5: 'Date',
 6: 'Hectare Squirrel Number',
 7: 'Age',
 8: 'Primary Fur Color',
 9: 'Highlight Fur Color',
 10: 'Combination of Primary and Highlight Color',
 11: 'Color notes',
 12: 'Location',
 13: 'Above Ground Sighter Measurement',
 14: 'Specific Location',
 15: 'Running',
 16: 'Chasing',
 17: 'Climbing',
 18: 'Eating',
 19: 'Foraging',
 20: 'Other Activities',
 21: 'Kuks',
 22: 'Quaas',
 23: 'Moans',
 24: 'Tail flags',
 25: 'Tail twitches',
 26: 'Approaches',
 27: 'Indifferent',
 28: 'Runs from',
 29: 'Other Interactions',
 30: 'Lat/Long',
 31: 'Zip Codes',
 32: 'Community Districts',
 33: 'Borough Boundaries',
 34: 'City Council Districts',
 35: 'Police Precincts'}
Use iloc to select just the second column (Unique Squirrel ID)

https://nbviewer.jupyter.org/github/chris1610/pbpython/blob/master/notebooks/Selecting_Columns_in_DataFrame.ipynb

Cacher is the code snippet organizer for pro developers

We empower you and your team to get more done, faster

columns