-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
35 lines (26 loc) · 1.46 KB
/
main.py
File metadata and controls
35 lines (26 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from Ingesters.ingester import Mongo_Csv_Pandas_Data_Ingester, Mongo_Dask_Data_Ingester
import dask.dataframe as dd
import argparse
import json
def csv_example(mongo_address, mongo_port, db_name, collection_name, csv_location):
ingester = Mongo_Csv_Pandas_Data_Ingester(mongo_address=mongo_address, mongo_port=mongo_port, db_name=db_name, collection_name=collection_name)
ingester.ingest_data(data_frame=ingester.create_data_frame(data_folder=csv_location))
def dask_example(mongo_address, mongo_port, db_name, collection_name, csv_location):
ingester = Mongo_Dask_Data_Ingester(mongo_address=mongo_address, mongo_port=mongo_port, db_name=db_name, collection_name=collection_name)
df = dd.read_csv(csv_location)
ingester.ingest_data(data_frame=df)
def read_config(config_file_location: str):
with open(config_file_location) as config_file:
return json.load(config_file)
if __name__ == "__main__":
class_mapper = {
"dask_example": dask_example,
"csv_example": csv_example
}
parser = argparse.ArgumentParser(description="Tool to mass donwlaod data from http://census.ire.org/data/bulkdata.html")
parser.add_argument("--config_file", type=str, required=True)
args = parser.parse_args()
config = read_config(config_file_location=args.config_file)
for download in config['example']:
download_instance = class_mapper[download['function']]
download_instance(**download['function_variables'])