run

#!/usr/bin/env python3

# This simulator is provided by CS3700 Northeastern University

import sys, os, time, json, socket, select, random, subprocess, signal
import string, hashlib, bisect, atexit, getpass, argparse, functools

VERSION = "2"

ALL_TESTS = 'all'
REPLICA_PROG = './3700kvstore'
NUM_CLIENTS = 8

RECV_WAIT = 0.1
SEND_WAIT = 0.01
INTER_TEST_WAIT = 10

MAX_GET_FRAC = 0.5
MAX_PUT_FRAC = 0.5
MAX_GET_FAIL_GEN_FRAC = 0.1
APPENDS_BATCHED_FRAC = 0.5

CORRECTNESS_TESTS = (
    "incorrect",
    "died",
    "no_state",
    "get_answered",
    "put_answered",
    "get_generated",
    "total_msgs"
)
PERF_TESTS = ("total_msgs", "failures", "duplicates", "median_latency")

TERMINATOR = b'}\n'
DEVNULL = open(os.devnull, 'w')

#######################################################################################################
# Utilities for coloring the terminal output, since import termcolor and colorama aren't available
# on the Khoury machines >:(

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

def bold(s):
    print(bcolors.BOLD + bcolors.UNDERLINE + s + bcolors.ENDC)

def fail(s):
    print(bcolors.FAIL + s + bcolors.ENDC)

def ok(s):
    print(bcolors.OKGREEN + s + bcolors.ENDC)

def win(s):
    print(bcolors.OKBLUE + s + bcolors.ENDC)

def warn(s):
    print(bcolors.WARNING + s + bcolors.ENDC)

#######################################################################################################

# Parses and validates config files for simulations
class Config:
    def __init__(self, config):
        # load the json
        if type(config) == dict:
            conf = config
        elif type(config) == str:
            if os.path.isfile(config):
                conf = json.loads(open(config).read())
            else:
                conf = json.loads(config)
        else:
            raise TypeError("The type of the config parameter must be a file path, string-encoded JSON, or JSON")

        # check for required fields
        if 'lifetime' not in conf or 'replicas' not in conf or 'requests' not in conf:
            raise AttributeError("Required field is missing from the config file")
        
        # load the required fields and sanity check them
        self.lifetime = int(conf['lifetime'])
        if self.lifetime < 5:
            raise ValueError("Simulation lifetime must be at least 5 seconds")
        self.replicas = int(conf['replicas'])
        if self.replicas < 3 or self.replicas > 21:
            raise ValueError("Number of replicas must be at least 3 and at most 21")
        self.requests = int(conf['requests'])
        if self.requests < 0:
            raise ValueError("Number of requests cannot be negative")

        # initialize the random number generator
        if 'seed' in conf: self.seed = conf['seed']
        else: self.seed = None
        random.seed(self.seed)
        
        # load the default variables
        self.mix = self.__get_default__(conf, 'mix', 0, 1, 0.8, "Read/Write mix must be between 0 and 1")
        self.start_wait = self.__get_default__(conf, 'start_wait', 0, self.lifetime, 5.0,
            f"Start wait must be between 0 and {self.lifetime}")
        self.end_wait = self.__get_default__(conf, 'end_wait', 0, self.lifetime, 2.0,
            f"End wait must be between 0 and {self.lifetime}")
        self.drops = self.__get_default__(conf, 'drops', 0, 1, 0.0, "Drops must be between 0 and 1")
        self.max_packets = self.__get_default__(conf, 'max_packets', self.requests, 900000,
                                                20000, f"max_packets must be between {self.requests} and 900000")

        if 'events' in conf: self.events = conf['events']
        else: self.events = []

        # sanity check the events
        for event in self.events:
            if event['type'] not in ['kill_leader', 'kill_non_leader', 'part_easy', 'part_hard', 'part_end']:
                raise ValueError("Unknown event type:", event['type'])
            if event['time'] < 0 or event['time'] > self.lifetime:
                raise ValueError("Event time must be between 0 and", self.lifetime)

        # Load the correctness and performance benchmarks
        if 'tests' not in conf:
            raise ValueError("No data specified about testing or performance benchmarking")
        if 'benchmarks' not in conf['tests']:
            raise ValueError("No performance benchmarks specified")

        self.max_get_frac = self.__get_default__(conf['tests'], 'maximum_get_fail_fraction', 0.0, 1.0, MAX_GET_FRAC,
                                                 f'Maximum fraction of gets that may fail must be between 0 and 1 (default: {MAX_GET_FRAC})')
        self.max_put_frac = self.__get_default__(conf['tests'], 'maximum_put_fail_fraction', 0.0, 1.0, MAX_PUT_FRAC,
                                                 f'Maximum fraction of puts that may fail must be between 0 and 1 (default: {MAX_PUT_FRAC})')
        self.max_get_fail_frac = self.__get_default__(conf['tests'], 'maximum_get_generation_fail_fraction', 0.0, 1.0,     
                                                      MAX_GET_FAIL_GEN_FRAC,
                                                      f'Maximum fraction of gets that fail to be generated must be between 0 and 1 (default: {MAX_GET_FAIL_GEN_FRAC})')
        self.app_batch_frac = self.__get_default__(conf['tests'], 'maximum_appends_batched_fraction', 0.0, 1.0, APPENDS_BATCHED_FRAC,
                                                   f'Fraction of appends that may be batched must be between 0 and 1 (default: {APPENDS_BATCHED_FRAC})')

        self.benchmarks = {}
        for test, tiers in conf['tests']['benchmarks'].items():
            if len(tiers) != 3:
                raise ValueError(f"Incorrect number of performance tiers in test {test}. len(tiers) = {len(tiers)}")
            if not all([type(t) == float or type(t) == int for t in tiers]):
                raise ValueError(f"Type issue in test {test}. Given tiers: {str(tiers)}")
            if test not in PERF_TESTS:
                raise ValueError("Unknown test type:", test)

            self.benchmarks[test] = tiers

        self.lifetime += self.start_wait + self.end_wait

    def __get_default__(self, conf, field, low, high, default, exception):
        if field in conf:
            temp = float(conf[field])
            if temp < low or temp > high:
                raise ValueError(exception)
        else: temp = default
        return temp
    
    def dump(self):
        print(('{:8} {}\n' * 13).format('Lifetime', self.lifetime, 'Replicas', self.replicas,
                                        'Requests', self.requests, 'Seed', self.seed,
                                        'Mix', self.mix, 'Start Wait', self.start_wait,
                                        'End Wait', self.end_wait, 'Drops', self.drops,
                                        'Max Packets', self.max_packets,
                                        'Maximum Get Fail Fraction', self.max_get_frac,
                                        'Maximum Put Fail Fraction', self.max_put_frac,
                                        'Maximum Get Generation Failure Fraction', self.max_get_fail_frac,
                                        'Append Batching Fraction', self.app_batch_frac))

        for event in self.events:
            print(f"   Event {event['type']:15} {event['time']}")

        for test, tiers in self.benchmarks:
            print(f'{test:16} {str(tiers)}')

#######################################################################################################

class Stats:
    def __init__(self):
        self.total_msgs = 0
        self.total_drops = 0
        # Base gets and puts that the simulation will send. total_* will be higher, due to redirects and failures
        self.generated_get = 0
        self.generated_put = 0
        # Could not generate a get because nothing had been put() yet
        self.failed_to_generate_get = 0
        # Actual get() and put() requests sent during the simulation
        self.total_get = 0
        self.total_put = 0
        # get() and put() requests that elicited a failure response
        self.failed_get = 0
        self.failed_put = 0
        # Outstanding get() and put() requests at the end of the sim
        self.unanswered_get = 0
        self.unanswered_put = 0
        self.incorrect = 0
        self.duplicates = 0
        self.redirects = 0
        self.latencies = []
        self.died = 0
        self.killed = 0
        self.blocked = 0
        self.mean_latency = 0.0
        self.median_latency = 0.0                        
        self.leaders = []

    def add_leader(self, ldr):
        if len(self.leaders) == 0 or self.leaders[-1] != ldr:
            self.leaders.append(ldr)
        
    def finalize(self):
       
        self.failures = self.failed_get + self.failed_put + self.unanswered_get + self.unanswered_put

        if len(self.latencies) > 0:
            self.latencies.sort()
            self.mean_latency = float(sum(self.latencies))/len(self.latencies)
            self.median_latency = self.latencies[len(self.latencies)//2]

    def dump(self):
        print('Leaders:', ' '.join(self.leaders))
        print(f'Replicas that died/were killed: {self.died}/{self.killed}')
        print('Total messages sent:', self.total_msgs)
        print('Total messages dropped:', self.total_drops)
        print('Total messages blocked by full sockets:', self.blocked)
        print(f'Total client get()/put() requests: {self.total_get}/{self.total_put}')
        print('Total duplicate responses:', self.duplicates)
        print(f'Total unanswered get()/put() requests: {self.unanswered_get}/{self.unanswered_put}')
        print('Total redirects:', self.redirects)
        print(f'Total get()/put() failures: {self.failed_get}/{self.failed_put}')
        print('Total get() with incorrect response:', self.incorrect)
        if len(self.latencies) > 0:
            print('Mean/Median query latency: {}sec/{}sec'.format(float(sum(self.latencies))/len(self.latencies),
                                                                  self.latencies[len(self.latencies)//2]))


#######################################################################################################

class Client:
    class Request:
        def __init__(self, get, key, val=None):
            self.get = get
            self.key = key
            self.val = val
            self.ts = time.time()

    def __init__(self, simulator, cid):
        self.reqs = {}
        self.items = {}
        self.sim = simulator
        self.cid = cid
        self.leader = 'FFFF'

    def forget(self):
        self.leader = 'FFFF'
        
    def __get_rand_str__(self, size=16, chars=string.ascii_uppercase + string.digits):
        return ''.join(random.choice(chars) for _ in range(size))

    def __get_destination__(self):
        if len(self.sim.living_rids) == 0:
            return None
        if self.leader == 'FFFF' or self.leader not in self.sim.living_rids:
            self.leader = 'FFFF'
            return random.choice(list(self.sim.living_rids))
        return self.leader
    
    def __create_get__(self, key):
        self.sim.stats.total_get += 1
        mid = self.__get_rand_str__()
        self.reqs[mid] = self.Request(True, key)
        dst = self.__get_destination__()
        return {'src': self.cid, 'dst': dst, 'leader': self.leader,
                'type': 'get', 'MID': mid, 'key': key}
        
    def __create_put__(self, key, value):
        self.sim.stats.total_put += 1
        mid = self.__get_rand_str__()
        self.reqs[mid] = self.Request(False, key, value)
        dst = self.__get_destination__()
        return {'src': self.cid, 'dst': dst, 'leader': self.leader,
                'type': 'put', 'MID': mid, 'key': key, 'value': value}

    def finalize(self):
        for req in self.reqs.values():
            if req.get: self.sim.stats.unanswered_get += 1
            else: self.sim.stats.unanswered_put += 1
        
    def create_req(self, get=True):
        # create a get message, if possible
        if get:
            if len(self.items) > 0:
                return self.__create_get__(random.choice(list(self.items.keys())))
            else:
                self.sim.stats.failed_to_generate_get += 1
        
        # decide to add a new key, or update an existing key
        if len(self.items) == 0 or random.random() > 0.5:
            k = self.__get_rand_str__(size=32)
            v = hashlib.md5(k.encode()).hexdigest()
        else:
            k = random.choice(list(self.items.keys()))
            v = hashlib.md5(self.items[k].encode()).hexdigest()
        return self.__create_put__(k, v)
                    
    def deliver(self, raw_msg, msg):
        # validate the message
        if 'MID' not in msg:
            warn(f"*** Simulator Error - Message missing mid field: {raw_msg}")
            self.sim.stats.incorrect += 1
            return None
        if msg['type'] not in ['ok', 'fail', 'redirect']:
            warn(f"*** Simulator Error - Unknown message type sent to client: {raw_msg}")
            self.sim.stats.incorrect += 1
            return None
        
        mid = msg['MID']

        # is this a duplicate?
        if mid in self.sim.completed:
            self.sim.stats.duplicates += 1
            return None
        
        # is this a message that I'm expecting?
        try:
            req = self.reqs[mid]
        except:
            warn(f"*** Simulator Error - client received an unexpected MID: {raw_msg}")
            self.sim.stats.incorrect += 1
            return None
        
        del self.reqs[mid]
        self.leader = msg['leader']
        self.sim.stats.latencies.append(time.time() - req.ts)
        
        # if this is a redirect or a fail, try again
        if msg['type'] in ['redirect', 'fail']:
            if req.get:
                if msg['type'] == 'fail': self.sim.stats.failed_get += 1
                self.sim.stats.redirects += 1
                return self.__create_get__(req.key)            
            if msg['type'] == 'fail': self.sim.stats.failed_put += 1            
            self.sim.stats.redirects += 1
            return self.__create_put__(req.key, req.val)
        
        # msg type must be ok, mark it as completed
        self.sim.completed.add(mid)
        if req.get:
            if 'value' not in msg:
                warn(f"*** Simulator Error - get() response missing the value of the key: {raw_msg}")
                self.sim.stats.incorrect += 1
    
            elif self.items[req.key] != msg['value']:
                warn(f"*** Simulator Error - client received an incorrect value for a key: {raw_msg}")
                self.sim.stats.incorrect += 1
        else:
            self.items[req.key] = req.val
        
        return None

#######################################################################################################

# Represents a replica, the associated process, and it's sockets
class Replica:
    DELIVERY_OK = 0
    DELIVERY_DEAD = 1
    DELIVERY_BLOCKED = 2

    def __init__(self, rid):
        self.rid = rid
        self.alive = False
        self.fds = []

        # try and delete the old domain socket, just in case
        try: os.unlink(rid)
        except: pass

        # create the socket for this replica
        self.socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        self.socket.bind(('localhost', 0))

        self.port = self.socket.getsockname()[1]
        self.remote_port = None

    def run(self, rids, silence):
        args = [REPLICA_PROG, str(self.port), self.rid]
        args.extend(rids - set([self.rid]))

        # Launch each process in it's own process group so that it can be killed without affecting
        # the main process
        try:
            if silence: 
                self.proc = subprocess.Popen(args, stdout=DEVNULL, stderr=DEVNULL, preexec_fn=os.setsid)
            else: 
                self.proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=os.setsid)
                def make_non_blocking(fd):
                    try:
                        from fcntl import fcntl, F_GETFL, F_SETFL
                        flags = fcntl(fd, F_GETFL) # get current p.stdout flags
                        fcntl(fd, F_SETFL, flags | os.O_NONBLOCK)
                    except ImportError:
                        print("Warning:  Unable to load fcntl module; things may not work as expected.")

                make_non_blocking(self.proc.stdout)
                make_non_blocking(self.proc.stderr)
                self.fds = [self.proc.stdout, self.proc.stderr]

            self.alive = True
        except:
            fail('*** Simulator Error - Unable to execute ./3700kvstore')
            self.alive = False
        
    def shutdown(self):
        if self.alive:
            self.alive = False
            self.socket.close()
            self.socket = None
            # Kill the process group assigned to the raft replica
            os.killpg(os.getpgid(self.proc.pid), signal.SIGTERM)
            self.proc.wait()
            try: os.unlink(self.rid)
            except: pass

    def deliver(self, raw_msg):
        if self.alive and self.remote_port:
            try:
                ready = select.select([], [self.socket], [], SEND_WAIT)[1]
                if self.socket in ready:
                    self.socket.sendto(raw_msg.encode(), ('localhost', self.remote_port))

                    return Replica.DELIVERY_OK
                return Replica.DELIVERY_BLOCKED
            except:
                warn('*** Simulator Error - Unable to send to replica')
                self.shutdown()
        return Replica.DELIVERY_DEAD
                                
#######################################################################################################

# Represents and executes the entire simulation
class Simulation:        
    def __init__(self, config, silence):
        self.leader = 'FFFF'
        self.events = []
        self.silence = silence

        # stats tracking
        self.stats = Stats()
        self.completed = set()

        # virtual network partitions
        self.partition = None
                
        # Load the config file
        self.conf = Config(config)
        #self.conf.dump()
        
        # Create the clients
        self.cids = set()
        self.clients = {}
        for i in range(self.conf.replicas + 16, self.conf.replicas + 16 + NUM_CLIENTS):
            cid = (f'{i:04x}').upper()
            self.cids.add(cid)
            self.clients[cid] = Client(self, cid)
                
        # Create the sockets and the replicas
        self.rids = set()
        self.replicas = {}
        for i in range(self.conf.replicas):
            rid = (f'{i:04x}').upper()
            self.rids.add(rid)
            self.replicas[rid] = Replica(rid)

        self.living_rids = self.rids.copy()
        self.recv_buffers = {}

        self.type_to_func = {
            'kill_leader': self.__kill_leader__,
            'kill_non_leader': self.__kill_non_leader__,
            'part_easy': self.__partition_easy__,
            'part_hard': self.__partition_hard__,
            'part_end': self.__partition_end__
        }

    @functools.total_ordering
    class SimEvent:
        def __init__(self, timestamp, function):
            self.timestamp = timestamp
            self.function = function

        def __lt__(self, other):
            if isinstance(other, Simulation.SimEvent):
                return self.timestamp < other.timestamp
            return False

        def __eq__(self, other):
            if isinstance(other, Simulation.SimEvent):
                return self.timestamp == other.timestamp
            return False

    def run(self):
        for r in self.replicas.values():
            r.run(self.rids, self.silence)

        # initialize the clock and create all the get(), put(), and kill() events
        clock = start = time.time()
        self.__populate_event_queue__(clock)
        
        # the main event loop
        while clock - start < self.conf.lifetime and self.stats.total_msgs < self.conf.max_packets and len(self.living_rids) > 0:
            # populate the list of living sockets
            sockets = []
            for r in self.replicas.values():
                if r.socket: sockets.append(r.socket)
                sockets.extend(r.fds)

            ready = select.select(sockets, [], [], RECV_WAIT)[0]
            
            for sock in ready:
                for r in self.replicas.values():
                    # If it is a network socket, route the packets
                    if r.socket == sock:
                        self.__route_msgs__(sock)

                    # Otherwise its a output socket
                    if sock in r.fds:
                        self.__print_output(sock, start)

            # check the time and fire off events
            clock = time.time()
            while len(self.events) != 0 and self.events[0].timestamp < clock:
                self.events.pop(0).function()
        
        if self.stats.total_msgs >= self.conf.max_packets:
            warn(f"*** Simulator Error - Replicas sent too many packets (>{self.conf.max_packets}), possible packet storm")
        
        self.stats.died = len(self.rids) - len(self.living_rids) - self.stats.killed
        
        # Finish out the clients. All unanswered requests are considered failures
        for client in self.clients.values():
            client.finalize()
        
        # Finish calculating the statistics
        self.stats.finalize()
                                
    def shutdown(self):
        for r in self.replicas.values():
            try: r.shutdown()
            except: pass
                                
    def __kill_replica__(self, r):
        if r.rid in self.living_rids:
            self.stats.killed += 1
            self.living_rids.remove(r.rid)
            r.shutdown()

    def __kill_leader__(self):
        if self.leader != 'FFFF':
            self.__kill_replica__(self.replicas[self.leader])
            self.leader = 'FFFF'
            for client in self.clients.values(): client.forget()
                        
    def __kill_non_leader__(self):
        if len(self.living_rids) > 1:
            self.__kill_replica__(self.replicas[random.choice(list(self.living_rids - set([self.leader])))])
        else:
            warn(f'*** Simulator Error - too few living replicas to kill another ({len(self.living_rids)})')

    def __partition__(self, add_leader=False):
        qsize = len(self.replicas) // 2 + 1
        self.partition = set()
        r = list(self.rids)

        if add_leader and self.leader != 'FFFF':
            self.partition.add(self.leader)
            r.remove(self.leader)
            qsize -= 1
        else:
            self.leader = 'FFFF'
            for client in self.clients.values(): client.forget()
                        
        for i in range(qsize):
            rid = random.choice(r)
            self.partition.add(rid)
            r.remove(rid)

    def __partition_easy__(self):
        self.__partition__(True)
                        
    def __partition_hard__(self):
        self.__partition__()

    def __partition_end__(self):
        self.partition = None

    def __check_partition__(self, rid1, rid2):
        if not self.partition: return True
        i = len(self.partition & set([rid1, rid2]))
        if i == 2 or i == 0: return True
        return False
                
    def __send_get__(self):
        client = random.choice(list(self.clients.values()))
        msg = client.create_req(True)
        if msg['dst']:
            self.__replica_deliver__(self.replicas[msg['dst']], json.dumps(msg))
        
    def __send_put__(self):
        client = random.choice(list(self.clients.values()))
        msg = client.create_req(False)
        if msg['dst']:
            self.__replica_deliver__(self.replicas[msg['dst']], json.dumps(msg))

    def __replica_deliver__(self, replica, raw_msg):
        status = replica.deliver(raw_msg)
        if status == Replica.DELIVERY_DEAD:
            if replica.rid in self.living_rids:
                self.living_rids.remove(replica.rid)
        elif status == Replica.DELIVERY_BLOCKED:
            warn(f'*** Simulator Error - the socket to replica {replica.rid} is blocked')
            self.stats.blocked += 1
        # else status == Replica.DELIVERY_OK
                        
    def __populate_event_queue__(self, clock):
        clock += self.conf.start_wait

        # Generate get() and put() events for the event queue
        t = clock
        delta = float(self.conf.lifetime - self.conf.start_wait - self.conf.end_wait) / self.conf.requests
        for i in range(self.conf.requests):
            if random.random() < self.conf.mix: 
                self.stats.generated_get += 1
                self.events.append(Simulation.SimEvent(t, self.__send_get__))
            else: 
                self.stats.generated_put += 1
                self.events.append(Simulation.SimEvent(t, self.__send_put__))
            t += delta

        # Add any events from the config into the event queue
        for event in self.conf.events:
            bisect.insort(self.events, Simulation.SimEvent(event['time'] + clock, self.type_to_func[event['type']]))

    def __validate_addr__(self, addr):
        if type(addr) != str or len(addr) != 4: return False
        try:
            i = int(addr, 16)
        except:
            return False
        return True

    def __print_output(self, sock, start):
        for r in self.replicas.values():
            if sock in r.fds:
                msg = sock.read(1500).decode('utf-8')
                if len(msg) > 0:
                    for line in msg.strip().split("\n"):
                        print("[%02.4f  Replica %s]: %s" % (time.time() - start, r.rid, line))
                else:
                    print("Replica %s crashed; closing sockets" % r.rid)
                    r.fds = []

    def __route_msgs__(self, sock):

        try:
            buf, addr = sock.recvfrom(65535)
        except: 
            warn("*** Simulator Error - A replica quit unexpectedly")
            self.__close_replica__(sock)
            return
            
        # is this sock shutting down?
        if len(buf) == 0:
            warn("*** Simulator Error - Replica shut down a socket unexpectedly")
            self.__close_replica__(sock)
            return

        try:
            raw_msg = buf.decode()
        except:
            warn("*** Simulator Error - Could not decode message from replica, it may contain non-ASCII data")
            self.__close_replica__(sock)
            return
                        
        # decode and validate the message
        try:
            msg = json.loads(raw_msg)
        except:
            warn(f"*** Simulator Error - Unable to decode JSON message: {raw_msg}")
            self.stats.incorrect += 1
            return
            
        if type(msg) is not dict:
            warn(f"*** Simulator Error - Message is not a dictionary: {raw_msg}")
            self.stats.incorrect += 1
            return
        if 'src' not in msg or 'dst' not in msg or 'leader' not in msg or 'type' not in msg:
            warn(f"*** Simulator Error - Message is missing a required field: {raw_msg}")
            self.stats.incorrect += 1
            return
        if not self.__validate_addr__(msg['leader']):
            warn(f"*** Simulator Error - Incorrect leader format: {raw_msg}")
            self.stats.incorrect += 1
            return
        if not self.__validate_addr__(msg['dst']):
            warn(f"*** Simulator Error - Incorrect destination format: {raw_msg}")
            self.stats.incorrect += 1
            return
        if not self.__validate_addr__(msg['src']):
            warn(f"*** Simulator Error - Incorrect source format: {raw_msg}")
            self.stats.incorrect += 1
            return
        
        # if it's a hello message, use it to grab the remote port
        if msg['type'] == "hello":
            for r in self.replicas.values():
                if sock == r.socket:
                    r.remote_port = addr[1]
            return

        # record the id of the current leader
        if not self.partition or msg['src'] in self.partition:
            self.stats.add_leader(msg['leader'])
            self.leader = msg['leader']

        # is this message to a replica?
        if msg['dst'] in self.replicas:
            self.stats.total_msgs += 1
            if self.__check_partition__(msg['src'], msg['dst']) and random.random() >= self.conf.drops:
                self.__replica_deliver__(self.replicas[msg['dst']], raw_msg)
            else: self.stats.total_drops += 1
    
        # is this message a broadcast?
        elif msg['dst'] == 'FFFF':
            self.stats.total_msgs += len(self.replicas) - 1
            for rid, r in self.replicas.items():
                if rid != msg['src']:
                    if self.__check_partition__(msg['src'], rid) and random.random() >= self.conf.drops:
                        self.__replica_deliver__(r, raw_msg)
                    else: self.stats.total_drops += 1

        # is this message to a client?
        elif msg['dst'] in self.clients:
            response = self.clients[msg['dst']].deliver(raw_msg, msg)
            if response:
                self.__replica_deliver__(self.replicas[response['dst']], json.dumps(response))
                
        # we have no idea who the destination is
        else:
            warn(f"*** Simulator Error - Unknown destination: {raw_msg}")
            self.stats.incorrect += 1
            
    def __close_replica__(self, sock):
        for r in self.replicas.values():
            if r.socket == sock:
                if r.rid in self.living_rids:
                    self.living_rids.remove(r.rid)
                    r.shutdown()
                break

    def correctness_check(self, ignore=[], verbose=True):
        passed = True
        
        # Correctness Tests -- things your key-value store must do
        if "incorrect" not in ignore and self.stats.incorrect:
            # It may not be inconsistent
            if verbose: fail('Error: >0 incorrect responses to get()')
            passed = False
        if "died" not in ignore and self.stats.died:
            # Replicas may not crash
            if verbose: fail('Error: >0 replicas died unexpectedly')
            passed = False
        if "no_state" not in ignore and sum(len(c.items) for c in self.clients.values()) == 0:
            if verbose: fail('Error: No state stored in replicas')
            passed = False
        if "get_answered" not in ignore and self.stats.unanswered_get > self.stats.generated_get * self.conf.max_get_frac:
            # Your system must answer a minimal number of get requests from clients
            if verbose: fail(f'Error: insufficient get() requests answered ({self.stats.unanswered_get} > {self.stats.generated_get} * {self.conf.max_get_frac:.2f})')
            passed = False
        if "put_answered" not in ignore and self.stats.unanswered_put > self.stats.generated_put * self.conf.max_put_frac:
            # Your system must answer a minimal number of get requests from clients
            if verbose: fail(f'Error: insufficient put() requests answered ({self.stats.unanswered_put} > {self.stats.generated_put} * {self.conf.max_put_frac:.2f})')
            passed = False
        if "get_generated" not in ignore and self.stats.failed_to_generate_get > self.stats.generated_get * self.conf.max_get_fail_frac:
            # If no put()s succeed, then no get()s can be generated. A minimum number of get()s must be generated
            if verbose: fail(f'Error: insufficient get() requests were generated because insufficient put()s were accepted ({self.stats.failed_to_generate_get} > {self.stats.generated_get} * {self.conf.max_get_fail_frac:.2f})')
            passed = False
        if "total_msgs" not in ignore and self.stats.total_msgs < self.conf.requests * self.conf.replicas * (1 - self.conf.mix) * (1 - self.conf.app_batch_frac):
            # There must be some minimal amount of traffic going between the replicas
            if verbose: fail('Error: too few messages between the replicas')
            passed = False

        if passed:
            if verbose: ok('All correctness tests passed')
        return passed

    def __perf_result__(self, test_val, tiers, metric, verbose):
        if test_val < tiers[0]:
            if verbose: win(f'{metric}: {test_val} < {tiers[0]}, Bonus!')
            return 0
        if test_val < tiers[1]:
            if verbose: ok(f'{metric}: {test_val} < {tiers[1]}, Full credit')
            return 1
        if test_val < tiers[2]:
            if verbose: warn(f'{metric}: {test_val} < {tiers[2]}, Partial credit, needs improvement')
            return 2
        
        if verbose: fail(f'{metric}: {test_val} >= {tiers[2]}, No credit')
        return 3

    def performance_tests(self, ignore=[], verbose=True):
        results = []
        results.append(self.__perf_result__(self.stats.total_msgs, self.conf.benchmarks['total_msgs'],
                       "Total Messages Between Replicas", verbose))
        results.append(self.__perf_result__(self.stats.failures, self.conf.benchmarks['failures'],
                       "Total Failures and Unanswered Requests", verbose))
        results.append(self.__perf_result__(self.stats.duplicates, self.conf.benchmarks['duplicates'],
                       "Duplicate Responses to Clients", verbose))
        results.append(self.__perf_result__(self.stats.median_latency, self.conf.benchmarks['median_latency'],
                       "Median Response Latency to Clients", verbose))
        return results

    @staticmethod
    def get_num_performance_tests():
        return len(PERF_TESTS)

    @staticmethod
    def get_performance_tests():
        return PERF_TESTS

    @staticmethod
    def get_num_correctness_tests():
        return len(CORRECTNESS_TESTS)

    @staticmethod
    def get_correctness_tests():
        return CORRECTNESS_TESTS

#######################################################################################################

def single_test(testfile, silence=False):
    global sim

    sim = Simulation(testfile, silence)
    sim.run()
    sim.shutdown()
    stats = sim.stats

    bold("\n# Simulation Finished\n\n## Useful Information and Statistics")
    sim.stats.dump()
    
    bold("\n## Correctness Checks")
    passed = sim.correctness_check()

    if passed:
        bold("\n## Performance Tests")
        print("## <test metric>: <your score> <benchmark score>, <test result>")
        sim.performance_tests()
    else:
        print('\n## Correctness Checks Failed, Skipping Performance Tests')

    sim = None

def run_test(filename, config_dir, description, silence=False, log=None):
    global sim

    sim = Simulation(os.path.join(config_dir, filename), silence)
    sim.run()
    sim.shutdown()
    stats = sim.stats    

    passed = sim.correctness_check()

    if passed:
        perf = sim.performance_tests()

        if log:
            log.write('{} {} {} {} {} {} {} {} {}\n'.format(filename, stats.total_msgs, 
                                                        stats.failed_get, stats.unanswered_get,
                                                        stats.failed_put, stats.unanswered_put,
                                                        stats.duplicates,
                                                        stats.mean_latency, stats.median_latency))

        print(f'\t{description:<60}\t[PASS]\tPerformance Tiers:', end='') 
        for t in perf:
            print(f' {t}', end='')
        print()
    else:
        print(f'\t{description:<60}\t[FAIL]')

    sim = None
    time.sleep(INTER_TEST_WAIT)

    return passed

def all_tests(config_dir, silence=False, leaderboard=False):
    trials = []
    silence=True
    leaderboard=True
    print('Basic tests:')
    trials.append(run_test('simple-1.json', config_dir, 'No drops, no failures, 80% read', silence))
    trials.append(run_test('simple-2.json', config_dir, 'No drops, no failures, 20% read', silence))

    print('Unreliable network tests:')
    trials.append(run_test('unreliable-1.json', config_dir, '5% drops, no failures, 20% read', silence))
    trials.append(run_test('unreliable-2.json', config_dir, '10% drops, no failures, 20% read', silence))
    trials.append(run_test('unreliable-3.json', config_dir, '15% drops, no failures, 20% read', silence))

    print('Crash failure tests:')
    trials.append(run_test('crash-1.json', config_dir, 'No drops, 1 replica failure, 20% read', silence))
    trials.append(run_test('crash-2.json', config_dir, 'No drops, 2 replica failures, 20% read', silence))
    trials.append(run_test('crash-3.json', config_dir, 'No drops, 1 leader failure, 20% read', silence))
    trials.append(run_test('crash-4.json', config_dir, 'No drops, 2 leader failures, 20% read', silence))

    print('Partition tests:')
    trials.append(run_test('partition-1.json', config_dir, 'No drops, 1 easy partition, 20% read', silence))
    trials.append(run_test('partition-2.json', config_dir, 'No drops, 2 easy partitions, 20% read', silence))
    trials.append(run_test('partition-3.json', config_dir, 'No drops, 1 hard partition, 20% read', silence))
    trials.append(run_test('partition-4.json', config_dir, 'No drops, 2 hard partitions, 20% read', silence))

    print('Advanced tests:')
    trials.append(run_test('advanced-1.json', config_dir, '10% drops, 2 hard partitions and 1 leader failures, 20% read', silence))
    trials.append(run_test('advanced-2.json', config_dir, '15% drops, 2 leader failures, 20% read', silence))
    trials.append(run_test('advanced-3.json', config_dir, '30% drops, 1 leader failure, 20% read', silence))
    trials.append(run_test('advanced-4.json', config_dir, '10% drops, 3 hard partions and 1 leader kill, 20% read', silence))

    print('Passed', len(list(filter(None, trials))), 'out of', len(trials), 'tests')

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument("--replica",
                        "-r",
                        type=str,
                        default='./3700kvstore',
                        help=f'Fully qualified path to your replica program (Default: {REPLICA_PROG})')
    parser.add_argument("--silence",
                        "-s",
                        action='store_true',
                        help='Pipe stdout and stderr of replicas to /dev/null. (Default: False)')
    parser.add_argument("--leaderboard",
                       "-l",
                       action='store_true',
                       help='Store your test report in the leaderboard. Only works when running "all" tests, and on the Khoury machines. (Default: False)')
    parser.add_argument("--config_directory",
                        "-c",
                        type=str,
                        default='configs/',
                        help='Path to a directory containing test configs. Only used when running "all" tests. (Default: ./configs/)')
    parser.add_argument("test",
                        type=str,
                        help='Path to a test config file, or "all" to run all tests.')
    
    args = parser.parse_args()

    REPLICA_PROG = args.replica
    sim = None

    def kill_processes():
        global sim

        if sim:
            try: sim.shutdown()
            except: pass

    # Attempt to kill child processes regardless of how Python shuts down (e.g., via an exception or ctrl-C)
    atexit.register(kill_processes)

    if args.test == ALL_TESTS:
        all_tests(args.config_directory, args.silence, False)  # args.leaderboard
    else:
        single_test(args.test, args.silence)