Convert CSV to Parquet files
In [2]:
Copied!
import os
import pyarrow as pa
import pyarrow.parquet as pq
import pandas as pd
from google.cloud import storage
from io import StringIO
import graphium
import os
from os.path import dirname, abspath
MAIN_DIR = dirname(dirname(abspath(graphium.__file__)))
# TODO create funciton to read parquet, test from GCP storage (put it in path, should support gs and path, explore function from Pandas instead of parquet "pq")
def _csv_to_parquet(csv_path, parquet_path):
df = pd.read_csv(csv_path)
df.to_parquet(parquet_path)
_csv_to_parquet(MAIN_DIR + '/graphium/data/QM9/micro_qm9.csv', MAIN_DIR + '/graphium/data/QM9/micro_qm9.parquet')
import os
import pyarrow as pa
import pyarrow.parquet as pq
import pandas as pd
from google.cloud import storage
from io import StringIO
import graphium
import os
from os.path import dirname, abspath
MAIN_DIR = dirname(dirname(abspath(graphium.__file__)))
# TODO create funciton to read parquet, test from GCP storage (put it in path, should support gs and path, explore function from Pandas instead of parquet "pq")
def _csv_to_parquet(csv_path, parquet_path):
df = pd.read_csv(csv_path)
df.to_parquet(parquet_path)
_csv_to_parquet(MAIN_DIR + '/graphium/data/QM9/micro_qm9.csv', MAIN_DIR + '/graphium/data/QM9/micro_qm9.parquet')
In [ ]:
Copied!
# TODO create funciton to specify if you read parquet or csv
# TODO replace all location with call for _read_csv and make sure to read all files if path ends with "*"
# def read_table:
# TODO create funciton to specify if you read parquet or csv
# TODO replace all location with call for _read_csv and make sure to read all files if path ends with "*"
# def read_table:
In [1]:
Copied!
!pwd
!pwd
/home/oleksandr/code/graphium/docs/tutorials/basics
In [3]:
Copied!
import graphium
import os
from os.path import dirname, abspath
MAIN_DIR = dirname(dirname(abspath(graphium.__file__)))
os.chdir(MAIN_DIR) # No need for this file
import graphium
import os
from os.path import dirname, abspath
MAIN_DIR = dirname(dirname(abspath(graphium.__file__)))
os.chdir(MAIN_DIR) # No need for this file
Out[3]:
'/home/oleksandr/code/graphium/graphium'