https://github.com/dfirst-education/sample/blob/dev/dataframe_create.ipynb
import pandas as pd
# changing format
pd.get_option("display.max_columns")
pd.get_option("display.max_colwidth")
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', 100)
# reading excel file to dataframe
df = pd.read_excel('.\\input_target.xlsx', index_col=0)
# reading csv file to dataframe
df = pd.read_csv('.\\ input_target.csv', header=0)
# reading jsonl file to dataframe
df = pd.read_json('.\\ input_target.jsonl', orient='records', lines=True)
# showing dataframe
df [0:2]
print(df.shape)
df.dtypes
# Replacing NaN with Zero 0
df = df.fillna(0)
# Changing column type
df['column1'] = df[' column1'].astype('object')
# selecting rows with a condition
df_selected_rows = df.query('col1 in ["value1", " value2"]')
# selecting columns
df_selected_columns = df.loc[:, ["column1", " column2"]]
df_selected_columns = df.iloc[:, [0, 1]]
# writing dataframe to csv file
df.to_csv("output_target.csv")
# writing dataframe to json file
df [["column1", " column2", " column3"]].to_json("output_target.json", orient='records')
# resetting index
df_selected_rows.reset_index(inplace=True, drop=True)
# sample of creating dataframe and inserting data with index
googleapikey = 'API Key' gmaps = googlemaps.Client(key=googleapikey) df_location = pd.DataFrame(columns=["city", "lat", "lng"]) i = 0 with open("./city-list.csv", "r", encoding="utf-8_sig") as f: reader = csv.reader(f, delimiter='\t') for row in reader: result = gmaps.geocode(row[0]) if len(result) != 0: lat = result[0]["geometry"]["location"]["lat"] lng = result[0]["geometry"]["location"]["lng"] data = {'city' : row[0], 'lat' : lat, 'lng' : lng} df_location.loc[i] = data # specifying index i += 1 time.sleep(1.0)
# outer join
df3 = pd.merge(df, df2, on="column1", how="outer")