Skip to content

Commit

Permalink
setup done
Browse files Browse the repository at this point in the history
  • Loading branch information
Prakhar authored and Prakhar committed May 15, 2023
1 parent 5a751c8 commit 9a9c8be
Show file tree
Hide file tree
Showing 23 changed files with 306 additions and 0 deletions.
Empty file added build/lib/src/__init__.py
Empty file.
26 changes: 26 additions & 0 deletions build/lib/src/get_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# read csv
# process
# return df

import os
import yaml
import pandas as pd
import argparse

def read_params(config_path):
with open(config_path) as yaml_file:
config = yaml.safe_load(yaml_file)
return config

def get_data(config_path):
config = read_params(config_path)
print(config)
data_path = config["data_source"]["s3_source"]
df = pd.read_csv(data_path,sep=",",encoding='utf-8')
# print(df.head())
return df
if __name__ == "__main__":
args = argparse.ArgumentParser()
args.add_argument("--config", default="params.yaml")
parsed_args = args.parse_args()
data = get_data(config_path=parsed_args.config)
20 changes: 20 additions & 0 deletions build/lib/src/load_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# read data from data scource
# save it in the data/raw for further process

import os
from get_data import read_params, get_data
import argparse

def loadandsave(config_path):
config = read_params(config_path)
df = get_data(config_path)
new_cols = [cols.replace(" ","_") for cols in df.columns]
raw_data_path = config["load_data"]["raw_dataset_csv"]
df.to_csv(raw_data_path,sep=",",header=new_cols,index=False)
print(new_cols)

if __name__ == "__main__":
args = argparse.ArgumentParser()
args.add_argument("--config", default="params.yaml")
parsed_args = args.parse_args()
data = loadandsave(config_path=parsed_args.config)
30 changes: 30 additions & 0 deletions build/lib/src/split_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# split the raw data
# save in data/processed folder

import os
import pandas as pd
import argparse
from sklearn.model_selection import train_test_split
from get_data import read_params

def split_and_saved_data(config_path):
config = read_params(config_path)
test_data_path = config["split_data"]["test_path"]
train_data_path = config["split_data"]["train_path"]
raw_data_path = config["load_data"]["raw_dataset_csv"]
split_ratio = config["split_data"]["test_size"]
random_state = config["base"]["random_state"]
df = pd.read_csv(raw_data_path, sep=",", )
train, test = train_test_split(
df,
test_size=split_ratio,
random_state=random_state
)
train.to_csv(train_data_path, sep=",",index=False)
test.to_csv(test_data_path, sep=",",index=False)

if __name__ == "__main__":
args = argparse.ArgumentParser()
args.add_argument("--config", default="params.yaml")
parsed_args = args.parse_args()
data = split_and_saved_data(config_path=parsed_args.config)
181 changes: 181 additions & 0 deletions build/lib/src/train_and_evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# Load train and test files
# Train algo
#
# # Save metrics and params
import warnings
import sys
import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error,mean_absolute_error, r2_score
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
from get_data import read_params

import argparse
import joblib
import json

def eval_metrics(actual,pred):
rmse=np.sqrt(mean_squared_error(actual,pred))
mae=mean_absolute_error(actual,pred)
r2=r2_score(actual,pred)
return rmse,mae,r2

def train_and_evaluate(config_path):
config = read_params(config_path)
train_data_path = config["split_data"]["train_path"]
test_data_path = config["split_data"]["test_path"]
random_state = config["base"]["random_state"]
model_dir = config["model_dir"]

alpha = config["estimators"]["ElasticNet"]["params"]["alpha"]
l1_ratio = config["estimators"]["ElasticNet"]["params"]["l1_ratio"]

target = config["base"]["target_col"]

train = pd.read_csv(train_data_path, sep=",")
test = pd.read_csv(test_data_path, sep=",")

train_y = train[target]
test_y = test[target]

train_x = train.drop(target, axis=1)
test_x = test.drop(target, axis=1)

lr = ElasticNet(alpha=alpha,
l1_ratio=l1_ratio,
random_state=random_state)
lr.fit(train_x,train_y)

predicted_qualities = lr.predict(test_x)

(rmse,mae,r2) = eval_metrics(test_y,predicted_qualities)

print("Elasticnet model (alpha=%f, l1_ratio=%f):" %(alpha,l1_ratio))
print(" RMSE: %s" % rmse)
print(" MAE: %s" % mae)
print(" R2 : %s " % r2)

scores_file = config["reports"]["scores"]
params_file = config["reports"]["params"]

with open(scores_file, "w") as f:
scores = {
"rmse" : rmse,
"mae" : mae,
"r2" : r2
}
json.dump(scores, f , indent=4)
with open(params_file, "w") as f:
params = {
"alpha" : alpha,
"l1_ratio" : l1_ratio
}
json.dump(params, f , indent=4)

os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir,"model.joblib")



if __name__ == "__main__":
args = argparse.ArgumentParser()
args.add_argument("--config", default="params.yaml")
parsed_args = args.parse_args()
# train_and_evaluate(config_path=parsed_args.config)
train_and_evaluate(config_path=parsed_args.config)

# load the train and test
# train algo
# save the metrices, params
# import warnings
# import sys
# import pandas as pd
# import numpy as np
# from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# from sklearn.model_selection import train_test_split
# import os
# from sklearn.linear_model import ElasticNet
# from get_data import read_params
# import argparse
# import joblib
# import json
#
#
# def eval_metrics(actual, pred):
# rmse = np.sqrt(mean_squared_error(actual, pred))
# mae = mean_absolute_error(actual, pred)
# r2 = r2_score(actual, pred)
# return rmse, mae, r2
#
#
# def train_and_evaluate(config_path):
# config = read_params(config_path)
# test_data_path = config["split_data"]["test_path"]
# train_data_path = config["split_data"]["train_path"]
# random_state = config["base"]["random_state"]
# model_dir = config["model_dir"]
#
# alpha = config["estimators"]["ElasticNet"]["params"]["alpha"]
# l1_ratio = config["estimators"]["ElasticNet"]["params"]["l1_ratio"]
#
# target = [config["base"]["target_col"]]
#
# train = pd.read_csv(train_data_path, sep=",")
# test = pd.read_csv(test_data_path, sep=",")
#
# train_y = train[target]
# test_y = test[target]
#
# train_x = train.drop(target, axis=1)
# test_x = test.drop(target, axis=1)
#
# lr = ElasticNet(
# alpha=alpha,
# l1_ratio=l1_ratio,
# random_state=random_state)
# lr.fit(train_x, train_y)
#
# predicted_qualities = lr.predict(test_x)
#
# (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)
#
# print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
# print(" RMSE: %s" % rmse)
# print(" MAE: %s" % mae)
# print(" R2: %s" % r2)
#
# #####################################################
# scores_file = config["reports"]["scores"]
# params_file = config["reports"]["params"]
#
# with open(scores_file, "w") as f:
# scores = {
# "rmse": rmse,
# "mae": mae,
# "r2": r2
# }
# json.dump(scores, f, indent=4)
#
# with open(params_file, "w") as f:
# params = {
# "alpha": alpha,
# "l1_ratio": l1_ratio,
# }
# json.dump(params, f, indent=4)
# #####################################################
#
# os.makedirs(model_dir, exist_ok=True)
# model_path = os.path.join(model_dir, "model.joblib")
#
# joblib.dump(lr, model_path)
#
#
# if __name__ == "__main__":
# args = argparse.ArgumentParser()
# args.add_argument("--config", default="params.yaml")
# parsed_args = args.parse_args()
# train_and_evaluate(config_path=parsed_args.config)
Empty file added build/lib/tests/__init__.py
Empty file.
Empty file added build/lib/tests/conftest.py
Empty file.
4 changes: 4 additions & 0 deletions build/lib/tests/test_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
def test_generic():
a=2
b=2
assert a==b
Binary file added dist/src-0.0.1-py3-none-any.whl
Binary file not shown.
Binary file added dist/src-0.0.1.tar.gz
Binary file not shown.
10 changes: 10 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from setuptools import setup, find_packages

setup(
name="src",
version = "0.0.1",
description = "its a wine Q package",
author = "Prakhar",
packages = find_packages(),
license = "MIT"
)
6 changes: 6 additions & 0 deletions src.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Metadata-Version: 2.1
Name: src
Version: 0.0.1
Summary: its a wine Q package
Author: Prakhar
License: MIT
14 changes: 14 additions & 0 deletions src.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
README.md
setup.py
src/__init__.py
src/get_data.py
src/load_data.py
src/split_data.py
src/train_and_evaluate.py
src.egg-info/PKG-INFO
src.egg-info/SOURCES.txt
src.egg-info/dependency_links.txt
src.egg-info/top_level.txt
tests/__init__.py
tests/conftest.py
tests/test_config.py
1 change: 1 addition & 0 deletions src.egg-info/dependency_links.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

2 changes: 2 additions & 0 deletions src.egg-info/top_level.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
src
tests
Binary file added src/__pycache__/__init__.cpython-38.pyc
Binary file not shown.
Empty file added tests/__init__.py
Empty file.
Binary file added tests/__pycache__/__init__.cpython-38.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Empty file added tests/conftest.py
Empty file.
4 changes: 4 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
def test_generic():
a=2
b=2
assert a==b
8 changes: 8 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[tox]
envlist = py38
;skipsdist = True

[testenv]
deps = -rrequirements.txt
commands =
pytest -v

0 comments on commit 9a9c8be

Please sign in to comment.