Convert CSV to Parquet files
In [1]:
Copied!
import pandas as pd
import graphium
from os.path import dirname, abspath
MAIN_DIR = dirname(dirname(abspath(graphium.__file__)))
# TODO create funciton to read parquet, test from GCP storage (put it in path, should support gs and path, explore function from Pandas instead of parquet "pq")
def _csv_to_parquet(csv_path, parquet_path):
df = pd.read_csv(csv_path)
df.to_parquet(parquet_path)
_csv_to_parquet(MAIN_DIR + '/graphium/data/QM9/micro_qm9.csv', MAIN_DIR + '/graphium/data/QM9/micro_qm9.parquet')
import pandas as pd
import graphium
from os.path import dirname, abspath
MAIN_DIR = dirname(dirname(abspath(graphium.__file__)))
# TODO create funciton to read parquet, test from GCP storage (put it in path, should support gs and path, explore function from Pandas instead of parquet "pq")
def _csv_to_parquet(csv_path, parquet_path):
df = pd.read_csv(csv_path)
df.to_parquet(parquet_path)
_csv_to_parquet(MAIN_DIR + '/graphium/data/QM9/micro_qm9.csv', MAIN_DIR + '/graphium/data/QM9/micro_qm9.parquet')
In [2]:
Copied!
# TODO create funciton to specify if you read parquet or csv
# TODO replace all location with call for _read_csv and make sure to read all files if path ends with "*"
# def read_table:
# TODO create funciton to specify if you read parquet or csv
# TODO replace all location with call for _read_csv and make sure to read all files if path ends with "*"
# def read_table:
In [2]:
Copied!
!pwd
!pwd
/nethome/andyh/graphium/docs/tutorials/feature_processing
In [3]:
Copied!
import graphium
import os
from os.path import dirname, abspath
MAIN_DIR = dirname(dirname(abspath(graphium.__file__)))
os.chdir(MAIN_DIR) # No need for this file
import graphium
import os
from os.path import dirname, abspath
MAIN_DIR = dirname(dirname(abspath(graphium.__file__)))
os.chdir(MAIN_DIR) # No need for this file
In [ ]:
Copied!