-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy path22-1 Simple ETL example
1 lines (1 loc) · 1.29 KB
/
22-1 Simple ETL example
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"22-1 Simple ETL example","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyNK5QpARNPug6kzE5xFqZeK"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"j4Atmk1JmA7s"},"source":["# Simple ETL example\n","# author: Gressling, T\n","# license: MIT License # code: github.com/gressling/examples\n","# activity: single example # index: 22-1 "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"HhybSHqGmGsk"},"source":["import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"hd8N0OOGmIIp"},"source":["# STEP 1 - EXTRACT\n","df = pd.read_csv('logP_dataset.csv', low_memory=True, header=None)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"FjmaU3dUmSJq"},"source":["# STEP 2 - TRANSFORM (here: Filter)\n","# filter example row for 1-bromo-1-methylsulfanylpropane\n","# https://pubchem.ncbi.nlm.nih.gov/compound/88089970\n","df = df.loc[df[0] == 'CCC(SC)Br', :]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"UnzoUQzBmUhq"},"source":["# STEP 3 - LOAD\n","df.to_csv('logP_dataset-filter.csv', sep=',', encoding='utf-8', index=False)"],"execution_count":null,"outputs":[]}]}