Skip to content

Commit

Permalink
Testing Azure Function
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul committed May 16, 2024
1 parent 8c740f1 commit d089631
Show file tree
Hide file tree
Showing 19 changed files with 163 additions and 0 deletions.
92 changes: 92 additions & 0 deletions function_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import json
import os
import pickle
import re

import azure.functions as func
import numpy as np
import pandas as pd
import tensorflow as tf
from flask import Flask, request
from pythainlp import word_tokenize

flask_app = Flask(__name__)

with open('sentiment_analysis/models/logistic_regression.pkl', 'rb') as f:
logistic_regression_model = pickle.load(f)
with open('sentiment_analysis/models/naive_bayes.pkl', 'rb') as f:
naive_bayes_model = pickle.load(f)
with open('sentiment_analysis/models/sklearn_vectorizer.pkl', 'rb') as f:
sklearn_vectorizer = pickle.load(f)
lstm_model = tf.keras.models.load_model('sentiment_analysis/models/lstm.h5', compile=False)
lstm_vectorizer = tf.keras.models.load_model('sentiment_analysis/models/tf_vectorizer')
transformers_vectorizer = tf.keras.models.load_model('sentiment_analysis/models/tf_vectorizer_t')
transformers_model = tf.keras.models.load_model('sentiment_analysis/models/transformers', compile=False)

batch_size = 32


def filter_thai(text):
'''
basically, filter out special characters
'''
pattern = re.compile(r"[^\u0E00-\u0E7F ]|^'|'$|''")
char_to_remove = re.findall(pattern, text)
list_with_char_removed = [char for char in text if not char in char_to_remove]
return ''.join(list_with_char_removed)


def tokenize_text(x):
return ' '.join(list(filter(lambda y: y.replace(' ', ''), word_tokenize(filter_thai(x)))))


@flask_app.get("/return_http")
def run_ai():
if request.headers['aikey'] == os.environ['AIKEY']:
if request.is_json:
try:
json_body = request.get_json()
data = json_body['json_data']
model_choice = json_body['model_choice']
# logging.info('successfully retrieved data')
data = pd.DataFrame.from_dict(json.loads(data))
# data was reordered by string indexing as it was serialized and sent
data = data.loc[list(map(str, range(len(data.index))))]
# logging.info('successfully converted data to data frame')
if model_choice != 'Vote':
predictions = {model_choice: None}
else:
predictions = {'Transformers': None, 'LSTM': None, 'Logistic Regression': None, 'Naive Bayes': None}
data['0'] = data['0'].apply(filter_thai)
if 'Logistic Regression' in predictions.keys():
predictions['Logistic Regression'] = logistic_regression_model.predict_proba(
sklearn_vectorizer.transform(data['0']).toarray()
).tolist()
if 'Naive Bayes' in predictions.keys():
predictions['Naive Bayes'] = naive_bayes_model.predict_proba(
sklearn_vectorizer.transform(data['0']).toarray()
).tolist()
data['0'] = data['0'].apply(tokenize_text)
if 'LSTM' in predictions.keys():
predictions['LSTM'] = np.concatenate([lstm_model(lstm_vectorizer(
data.iloc[(batch_size * i):min((batch_size * (i + 1)), len(data.index))])).numpy() for i in
range(1 + len(data.index) // batch_size)], axis=0).tolist()
if 'Transformers' in predictions.keys():
predictions['Transformers'] = np.concatenate([transformers_model(transformers_vectorizer(
data.iloc[(batch_size * i):min((batch_size * (i + 1)), len(data.index))])).numpy() for i in
range(1 + len(data.index) // batch_size)],
axis=0).tolist()
# logging.info('successfully ran prediction models')
return {'predictions': predictions,
'message': tf.config.list_physical_devices('GPU')
}, 201
except Exception as e:
return {"error": str(e)}, 400
else:
return {"error": "Request must be JSON"}, 415
else:
return {'error': 'Wrong key'}, 401


app = func.WsgiFunctionApp(app=flask_app.wsgi_app,
http_auth_level=func.AuthLevel.ANONYMOUS)
26 changes: 26 additions & 0 deletions host.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"version": "2.0",
"logging":
{
"applicationInsights":
{
"samplingSettings":
{
"isEnabled": true,
"excludedTypes": "Request"
}
}
},
"extensionBundle":
{
"id": "Microsoft.Azure.Functions.ExtensionBundle",
"version": "[2.*, 3.0.0)"
},
"extensions":
{
"http":
{
"routePrefix": ""
}
}
}
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pandas==2.0.3
numpy==1.24.4
pythainlp==4.0.2
scikit-learn==1.3.2
tensorflow==2.10.1
flask==2.2.5
Binary file not shown.
Binary file added sentiment_analysis/models/lstm.h5
Binary file not shown.
Binary file added sentiment_analysis/models/naive_bayes.pkl
Binary file not shown.
Binary file added sentiment_analysis/models/sklearn_vectorizer.pkl
Binary file not shown.
4 changes: 4 additions & 0 deletions sentiment_analysis/models/tf_vectorizer/keras_metadata.pb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

�root"_tf_keras_sequential*�{"name": "sequential", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": false, "class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}}, {"class_name": "TextVectorization", "config": {"name": "text_vectorization", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "dtype": "string", "max_tokens": 1024, "standardize": null, "split": "whitespace", "ngrams": null, "output_mode": "int", "output_sequence_length": 64, "pad_to_max_tokens": false, "sparse": false, "ragged": false, "vocabulary": null, "idf_weights": null}}]}, "shared_object_id": 2, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 1]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "input_1"]}, "keras_version": "2.10.0", "backend": "tensorflow", "model_config": {"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "shared_object_id": 0}, {"class_name": "TextVectorization", "config": {"name": "text_vectorization", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "dtype": "string", "max_tokens": 1024, "standardize": null, "split": "whitespace", "ngrams": null, "output_mode": "int", "output_sequence_length": 64, "pad_to_max_tokens": false, "sparse": false, "ragged": false, "vocabulary": null, "idf_weights": null, "has_input_vocabulary": false}, "shared_object_id": 1}]}}}2
�root.layer_with_weights-0"_tf_keras_layer*�{"name": "text_vectorization", "trainable": true, "expects_training_arg": false, "dtype": "string", "batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "stateful": false, "must_restore_from_config": true, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "TextVectorization", "config": {"name": "text_vectorization", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "dtype": "string", "max_tokens": 1024, "standardize": null, "split": "whitespace", "ngrams": null, "output_mode": "int", "output_sequence_length": 64, "pad_to_max_tokens": false, "sparse": false, "ragged": false, "vocabulary": null, "idf_weights": null, "has_input_vocabulary": false}, "shared_object_id": 1, "build_input_shape": {"class_name": "TensorShape", "items": [null, 1]}}2
� 'root.layer_with_weights-0._lookup_layer"_tf_keras_layer*�{"name": "string_lookup", "trainable": true, "expects_training_arg": false, "dtype": "int64", "batch_input_shape": null, "stateful": false, "must_restore_from_config": true, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "StringLookup", "config": {"name": "string_lookup", "trainable": true, "dtype": "int64", "invert": false, "max_tokens": 1024, "num_oov_indices": 1, "oov_token": "[UNK]", "mask_token": "", "output_mode": "int", "sparse": false, "pad_to_max_tokens": false, "vocabulary": null, "idf_weights": null, "encoding": "utf-8", "has_input_vocabulary": false}, "shared_object_id": 4, "build_input_shape": {"class_name": "TensorShape", "items": [null, null]}}2
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 4 additions & 0 deletions sentiment_analysis/models/tf_vectorizer_t/keras_metadata.pb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

�root"_tf_keras_sequential*�{"name": "sequential", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": false, "class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}}, {"class_name": "TextVectorization", "config": {"name": "text_vectorization", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "dtype": "string", "max_tokens": 1024, "standardize": null, "split": "whitespace", "ngrams": null, "output_mode": "int", "output_sequence_length": 128, "pad_to_max_tokens": false, "sparse": false, "ragged": false, "vocabulary": null, "idf_weights": null}}]}, "shared_object_id": 2, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 1]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 1]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "input_1"]}, "keras_version": "2.10.0", "backend": "tensorflow", "model_config": {"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 1]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "shared_object_id": 0}, {"class_name": "TextVectorization", "config": {"name": "text_vectorization", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "dtype": "string", "max_tokens": 1024, "standardize": null, "split": "whitespace", "ngrams": null, "output_mode": "int", "output_sequence_length": 128, "pad_to_max_tokens": false, "sparse": false, "ragged": false, "vocabulary": null, "idf_weights": null, "has_input_vocabulary": false}, "shared_object_id": 1}]}}}2
�root.layer_with_weights-0"_tf_keras_layer*�{"name": "text_vectorization", "trainable": true, "expects_training_arg": false, "dtype": "string", "batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "stateful": false, "must_restore_from_config": true, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "TextVectorization", "config": {"name": "text_vectorization", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "dtype": "string", "max_tokens": 1024, "standardize": null, "split": "whitespace", "ngrams": null, "output_mode": "int", "output_sequence_length": 128, "pad_to_max_tokens": false, "sparse": false, "ragged": false, "vocabulary": null, "idf_weights": null, "has_input_vocabulary": false}, "shared_object_id": 1, "build_input_shape": {"class_name": "TensorShape", "items": [null, 1]}}2
� 'root.layer_with_weights-0._lookup_layer"_tf_keras_layer*�{"name": "string_lookup", "trainable": true, "expects_training_arg": false, "dtype": "int64", "batch_input_shape": null, "stateful": false, "must_restore_from_config": true, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "StringLookup", "config": {"name": "string_lookup", "trainable": true, "dtype": "int64", "invert": false, "max_tokens": 1024, "num_oov_indices": 1, "oov_token": "[UNK]", "mask_token": "", "output_mode": "int", "sparse": false, "pad_to_max_tokens": false, "vocabulary": null, "idf_weights": null, "encoding": "utf-8", "has_input_vocabulary": false}, "shared_object_id": 4, "build_input_shape": {"class_name": "TensorShape", "items": [null, null]}}2
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit d089631

Please sign in to comment.