examples/python/mesapy_logistic_reg_payload.py

#!/usr/bin/env python3

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import _numpypy as np
import marshal


def read_file_train(file_id):
    with teaclave_open(file_id, "rb") as rdr:
        featureData = []
        labelData = []
        while True:
            line = rdr.readline()
            if not line:
                break
            else:
                line = line.strip().split(',')
                featureData.append(line[:-1])
                labelData.append(line[-1])
        label = np.multiarray.array(labelData, dtype='float64').reshape(-1, 1)
        feature = np.multiarray.array(featureData, dtype='float64')

    return feature, label


def read_file_predict(file_id, params_id, scaler_id):
    params = None
    scaler = None
    with teaclave_open(params_id, "rb") as rdr0:
        params = rdr0.read()
    with teaclave_open(scaler_id, "rb") as rdr1:
        scaler = rdr1.read()

    params = marshal.loads(params)
    scaler = marshal.loads(scaler)
    featureData = []
    with teaclave_open(file_id, "rb") as rdr2:
        while True:
            line = rdr2.readline()
            if not line:
                break
            else:
                featureData.append(line.strip().split(','))
    feature = np.multiarray.array(featureData, dtype='float64')
    return feature, params, scaler


def save_model(params, scaler, params_saved, scaler_saved):
    params = marshal.dumps(params)
    scaler = marshal.dumps(scaler)
    with teaclave_open(params_saved, "wb") as wtr:
        wtr.write(params)
    with teaclave_open(scaler_saved, "wb") as wtr:
        wtr.write(scaler)
    return


def minmaxscaler_train(input_array):
    array_max = input_array.max(0)
    array_min = input_array.min(0)
    scaler = {"max": array_max, "min": array_min}
    input_array = (input_array - array_min) / (array_max - array_min)
    return input_array, scaler


def minmaxscaler_predict(input_array, scaler):
    array_max = np.multiarray.frombuffer(scaler["max"])
    array_min = np.multiarray.frombuffer(scaler["min"])
    input_array = (input_array - array_min) / (array_max - array_min)
    return input_array


def sigmoid(z):
    a = 1 / (1 + np.umath.exp(-z))
    return a


def initialize_with_zeros(dim):
    w = np.multiarray.zeros((dim, 1))
    b = 0
    return w, b


def propagate(w, b, X, Y):
    m = X.shape[1]
    A = sigmoid(w.T.dot(X) + b)
    cost = -((Y * np.umath.log(A) + (1 - Y) * np.umath.log(1 - A)).sum()) / m

    dZ = A - Y
    dw = (X.dot(dZ.T)) / m
    db = (dZ.sum()) / m

    grads = {"dw": dw, "db": db}
    return grads, cost


def optimize(w, b, X, Y, num_iterations, learning_rate):
    costs = []
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, Y)
        dw = grads["dw"]
        db = grads["db"]
        w = w - learning_rate * dw
        b = b - learning_rate * db
    params = {"w": w, "b": b}
    return params


def logistic_model(feature, label, learning_rate=0.1, num_iterations=2000):
    dim = feature.shape[0]
    w, b = initialize_with_zeros(dim)
    params = optimize(w, b, feature, label, num_iterations, learning_rate)
    return params


def logistic_predict(feature, params):
    w = np.multiarray.frombuffer(params['w'])
    b = np.multiarray.frombuffer(params['b'])
    m = feature.shape[1]
    prediction = np.multiarray.zeros((1, m))

    A = sigmoid(w.T.dot(feature) + b)
    for i in range(m):
        if A[i] > 0.5:
            prediction[0, i] = 1
        else:
            prediction[0, i] = 0

    return prediction


def train(train_file, params_saved, scaler_saved):
    feature, label = read_file_train(train_file)
    feature, scaler = minmaxscaler_train(feature)
    feature = feature.T
    label = label.T
    params = logistic_model(feature,
                            label,
                            num_iterations=2000,
                            learning_rate=0.05)
    save_model(params, scaler, params_saved, scaler_saved)
    return


def predict(file_id, params_id, scaler_id):
    feature, params, scaler = read_file_predict(file_id, params_id, scaler_id)
    feature = minmaxscaler_predict(feature, scaler)
    feature = feature.T
    prediction = logistic_predict(feature, params)
    return prediction


def entrypoint(argv):

    assert len(argv) == 8
    for i in range(0, 4):
        if argv[2 * i] == "train_file":
            train_file = argv[2 * i + 1]
        elif argv[2 * i] == "predict_file":
            predict_file = argv[2 * i + 1]
        elif argv[2 * i] == "params_saved":
            params_saved = argv[2 * i + 1]
        elif argv[2 * i] == "scaler_saved":
            scaler_saved = argv[2 * i + 1]
        elif argv[2 * i] == "operation":
            reg_type = argv[2 * i + 1]
    if reg_type == "train":
        train(train_file, params_saved, scaler_saved)
        return "Training is finished!"
    elif reg_type == "predict":
        prediction = predict(predict_file, params_saved, scaler_saved)
        return str(prediction)
    else:
        return "NOT supported argv"