pasztora
9/17/2019 - 9:21 AM

Extracting patterns from a dataframe

# Find and extract a pattern of a dataframe column into a new column
# Here a pattern of 4 digits and a dash line is extracted, e.g. "1234-"
df["New"] = df["Source"].str.extract("([0-9][0-9][0-9][0-9]-)", expand=False).str.strip()

# The same, but matching for the start of the string with ^
df["New"] = df["Source"].str.extract("(^[0-9][0-9][0-9][0-9]-)", expand=False).str.strip()

# Extracting the year from a date-time column
df["Year"] = pd.DatetimeIndex(df["Date-Time"]).year

# Extracting numbers from dataframe column names or just simply a list of strings
lactose_names = df.columns.tolist()
lactose_conc = [] # List storing lactose concentration values
for item in lactose_names:
    y = float(filter(lambda x: x.isdigit(), item))
    lactose_conc.append(y)