Python Pandas Dataframe Tips

Saturday , 26, August 2023

https://github.com/dfirst-education/sample/blob/dev/dataframe_create.ipynb

import pandas as pd

# changing format

pd.get_option("display.max_columns")
pd.get_option("display.max_colwidth")
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', 100)

 

# reading excel file to dataframe

df = pd.read_excel('.\\input_target.xlsx', index_col=0)

# reading csv file to dataframe

df = pd.read_csv('.\\ input_target.csv', header=0)

# reading jsonl file to dataframe

df = pd.read_json('.\\ input_target.jsonl', orient='records', lines=True)

 

# showing dataframe

df [0:2]

print(df.shape)

df.dtypes

 

# Replacing NaN with Zero 0

df = df.fillna(0)

 

# Changing column type

df['column1'] = df[' column1'].astype('object')

 

 

# selecting rows with a condition

df_selected_rows = df.query('col1 in ["value1", " value2"]')

# selecting columns

df_selected_columns = df.loc[:, ["column1", " column2"]]

df_selected_columns = df.iloc[:, [0, 1]]

 

 

# writing dataframe to csv file

df.to_csv("output_target.csv")

# writing dataframe to json file

df [["column1", " column2", " column3"]].to_json("output_target.json", orient='records')

 

# resetting index

df_selected_rows.reset_index(inplace=True, drop=True)

 

 

# sample of creating dataframe and inserting data with index

googleapikey = 'API Key'
gmaps = googlemaps.Client(key=googleapikey)

df_location = pd.DataFrame(columns=["city", "lat", "lng"])
i = 0
with open("./city-list.csv", "r", encoding="utf-8_sig") as f:
    reader = csv.reader(f, delimiter='\t')
    for row in reader:
        result = gmaps.geocode(row[0])
        if len(result) != 0:
            lat = result[0]["geometry"]["location"]["lat"]
            lng = result[0]["geometry"]["location"]["lng"]
        data = {'city' : row[0], 'lat' : lat, 'lng' : lng}
        df_location.loc[i] = data # specifying index
        i += 1
        time.sleep(1.0)

# outer join

df3 = pd.merge(df, df2, on="column1", how="outer")