-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Prakhar
authored and
Prakhar
committed
May 15, 2023
1 parent
5a751c8
commit 9a9c8be
Showing
23 changed files
with
306 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# read csv | ||
# process | ||
# return df | ||
|
||
import os | ||
import yaml | ||
import pandas as pd | ||
import argparse | ||
|
||
def read_params(config_path): | ||
with open(config_path) as yaml_file: | ||
config = yaml.safe_load(yaml_file) | ||
return config | ||
|
||
def get_data(config_path): | ||
config = read_params(config_path) | ||
print(config) | ||
data_path = config["data_source"]["s3_source"] | ||
df = pd.read_csv(data_path,sep=",",encoding='utf-8') | ||
# print(df.head()) | ||
return df | ||
if __name__ == "__main__": | ||
args = argparse.ArgumentParser() | ||
args.add_argument("--config", default="params.yaml") | ||
parsed_args = args.parse_args() | ||
data = get_data(config_path=parsed_args.config) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# read data from data scource | ||
# save it in the data/raw for further process | ||
|
||
import os | ||
from get_data import read_params, get_data | ||
import argparse | ||
|
||
def loadandsave(config_path): | ||
config = read_params(config_path) | ||
df = get_data(config_path) | ||
new_cols = [cols.replace(" ","_") for cols in df.columns] | ||
raw_data_path = config["load_data"]["raw_dataset_csv"] | ||
df.to_csv(raw_data_path,sep=",",header=new_cols,index=False) | ||
print(new_cols) | ||
|
||
if __name__ == "__main__": | ||
args = argparse.ArgumentParser() | ||
args.add_argument("--config", default="params.yaml") | ||
parsed_args = args.parse_args() | ||
data = loadandsave(config_path=parsed_args.config) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# split the raw data | ||
# save in data/processed folder | ||
|
||
import os | ||
import pandas as pd | ||
import argparse | ||
from sklearn.model_selection import train_test_split | ||
from get_data import read_params | ||
|
||
def split_and_saved_data(config_path): | ||
config = read_params(config_path) | ||
test_data_path = config["split_data"]["test_path"] | ||
train_data_path = config["split_data"]["train_path"] | ||
raw_data_path = config["load_data"]["raw_dataset_csv"] | ||
split_ratio = config["split_data"]["test_size"] | ||
random_state = config["base"]["random_state"] | ||
df = pd.read_csv(raw_data_path, sep=",", ) | ||
train, test = train_test_split( | ||
df, | ||
test_size=split_ratio, | ||
random_state=random_state | ||
) | ||
train.to_csv(train_data_path, sep=",",index=False) | ||
test.to_csv(test_data_path, sep=",",index=False) | ||
|
||
if __name__ == "__main__": | ||
args = argparse.ArgumentParser() | ||
args.add_argument("--config", default="params.yaml") | ||
parsed_args = args.parse_args() | ||
data = split_and_saved_data(config_path=parsed_args.config) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
# Load train and test files | ||
# Train algo | ||
# | ||
# # Save metrics and params | ||
import warnings | ||
import sys | ||
import pandas as pd | ||
import numpy as np | ||
|
||
from sklearn.metrics import mean_squared_error,mean_absolute_error, r2_score | ||
import os | ||
from sklearn.model_selection import train_test_split | ||
from sklearn.linear_model import ElasticNet | ||
from urllib.parse import urlparse | ||
from get_data import read_params | ||
|
||
import argparse | ||
import joblib | ||
import json | ||
|
||
def eval_metrics(actual,pred): | ||
rmse=np.sqrt(mean_squared_error(actual,pred)) | ||
mae=mean_absolute_error(actual,pred) | ||
r2=r2_score(actual,pred) | ||
return rmse,mae,r2 | ||
|
||
def train_and_evaluate(config_path): | ||
config = read_params(config_path) | ||
train_data_path = config["split_data"]["train_path"] | ||
test_data_path = config["split_data"]["test_path"] | ||
random_state = config["base"]["random_state"] | ||
model_dir = config["model_dir"] | ||
|
||
alpha = config["estimators"]["ElasticNet"]["params"]["alpha"] | ||
l1_ratio = config["estimators"]["ElasticNet"]["params"]["l1_ratio"] | ||
|
||
target = config["base"]["target_col"] | ||
|
||
train = pd.read_csv(train_data_path, sep=",") | ||
test = pd.read_csv(test_data_path, sep=",") | ||
|
||
train_y = train[target] | ||
test_y = test[target] | ||
|
||
train_x = train.drop(target, axis=1) | ||
test_x = test.drop(target, axis=1) | ||
|
||
lr = ElasticNet(alpha=alpha, | ||
l1_ratio=l1_ratio, | ||
random_state=random_state) | ||
lr.fit(train_x,train_y) | ||
|
||
predicted_qualities = lr.predict(test_x) | ||
|
||
(rmse,mae,r2) = eval_metrics(test_y,predicted_qualities) | ||
|
||
print("Elasticnet model (alpha=%f, l1_ratio=%f):" %(alpha,l1_ratio)) | ||
print(" RMSE: %s" % rmse) | ||
print(" MAE: %s" % mae) | ||
print(" R2 : %s " % r2) | ||
|
||
scores_file = config["reports"]["scores"] | ||
params_file = config["reports"]["params"] | ||
|
||
with open(scores_file, "w") as f: | ||
scores = { | ||
"rmse" : rmse, | ||
"mae" : mae, | ||
"r2" : r2 | ||
} | ||
json.dump(scores, f , indent=4) | ||
with open(params_file, "w") as f: | ||
params = { | ||
"alpha" : alpha, | ||
"l1_ratio" : l1_ratio | ||
} | ||
json.dump(params, f , indent=4) | ||
|
||
os.makedirs(model_dir, exist_ok=True) | ||
model_path = os.path.join(model_dir,"model.joblib") | ||
|
||
|
||
|
||
if __name__ == "__main__": | ||
args = argparse.ArgumentParser() | ||
args.add_argument("--config", default="params.yaml") | ||
parsed_args = args.parse_args() | ||
# train_and_evaluate(config_path=parsed_args.config) | ||
train_and_evaluate(config_path=parsed_args.config) | ||
|
||
# load the train and test | ||
# train algo | ||
# save the metrices, params | ||
# import warnings | ||
# import sys | ||
# import pandas as pd | ||
# import numpy as np | ||
# from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score | ||
# from sklearn.model_selection import train_test_split | ||
# import os | ||
# from sklearn.linear_model import ElasticNet | ||
# from get_data import read_params | ||
# import argparse | ||
# import joblib | ||
# import json | ||
# | ||
# | ||
# def eval_metrics(actual, pred): | ||
# rmse = np.sqrt(mean_squared_error(actual, pred)) | ||
# mae = mean_absolute_error(actual, pred) | ||
# r2 = r2_score(actual, pred) | ||
# return rmse, mae, r2 | ||
# | ||
# | ||
# def train_and_evaluate(config_path): | ||
# config = read_params(config_path) | ||
# test_data_path = config["split_data"]["test_path"] | ||
# train_data_path = config["split_data"]["train_path"] | ||
# random_state = config["base"]["random_state"] | ||
# model_dir = config["model_dir"] | ||
# | ||
# alpha = config["estimators"]["ElasticNet"]["params"]["alpha"] | ||
# l1_ratio = config["estimators"]["ElasticNet"]["params"]["l1_ratio"] | ||
# | ||
# target = [config["base"]["target_col"]] | ||
# | ||
# train = pd.read_csv(train_data_path, sep=",") | ||
# test = pd.read_csv(test_data_path, sep=",") | ||
# | ||
# train_y = train[target] | ||
# test_y = test[target] | ||
# | ||
# train_x = train.drop(target, axis=1) | ||
# test_x = test.drop(target, axis=1) | ||
# | ||
# lr = ElasticNet( | ||
# alpha=alpha, | ||
# l1_ratio=l1_ratio, | ||
# random_state=random_state) | ||
# lr.fit(train_x, train_y) | ||
# | ||
# predicted_qualities = lr.predict(test_x) | ||
# | ||
# (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) | ||
# | ||
# print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) | ||
# print(" RMSE: %s" % rmse) | ||
# print(" MAE: %s" % mae) | ||
# print(" R2: %s" % r2) | ||
# | ||
# ##################################################### | ||
# scores_file = config["reports"]["scores"] | ||
# params_file = config["reports"]["params"] | ||
# | ||
# with open(scores_file, "w") as f: | ||
# scores = { | ||
# "rmse": rmse, | ||
# "mae": mae, | ||
# "r2": r2 | ||
# } | ||
# json.dump(scores, f, indent=4) | ||
# | ||
# with open(params_file, "w") as f: | ||
# params = { | ||
# "alpha": alpha, | ||
# "l1_ratio": l1_ratio, | ||
# } | ||
# json.dump(params, f, indent=4) | ||
# ##################################################### | ||
# | ||
# os.makedirs(model_dir, exist_ok=True) | ||
# model_path = os.path.join(model_dir, "model.joblib") | ||
# | ||
# joblib.dump(lr, model_path) | ||
# | ||
# | ||
# if __name__ == "__main__": | ||
# args = argparse.ArgumentParser() | ||
# args.add_argument("--config", default="params.yaml") | ||
# parsed_args = args.parse_args() | ||
# train_and_evaluate(config_path=parsed_args.config) |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
def test_generic(): | ||
a=2 | ||
b=2 | ||
assert a==b |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from setuptools import setup, find_packages | ||
|
||
setup( | ||
name="src", | ||
version = "0.0.1", | ||
description = "its a wine Q package", | ||
author = "Prakhar", | ||
packages = find_packages(), | ||
license = "MIT" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
Metadata-Version: 2.1 | ||
Name: src | ||
Version: 0.0.1 | ||
Summary: its a wine Q package | ||
Author: Prakhar | ||
License: MIT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
README.md | ||
setup.py | ||
src/__init__.py | ||
src/get_data.py | ||
src/load_data.py | ||
src/split_data.py | ||
src/train_and_evaluate.py | ||
src.egg-info/PKG-INFO | ||
src.egg-info/SOURCES.txt | ||
src.egg-info/dependency_links.txt | ||
src.egg-info/top_level.txt | ||
tests/__init__.py | ||
tests/conftest.py | ||
tests/test_config.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
src | ||
tests |
Binary file not shown.
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
def test_generic(): | ||
a=2 | ||
b=2 | ||
assert a==b |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
[tox] | ||
envlist = py38 | ||
;skipsdist = True | ||
|
||
[testenv] | ||
deps = -rrequirements.txt | ||
commands = | ||
pytest -v |