Skip to content

Commit

Permalink
feat: windows support. (#63)
Browse files Browse the repository at this point in the history
  • Loading branch information
DifferentialityDevelopment authored May 27, 2024
1 parent 83745fd commit 2fa9d9f
Show file tree
Hide file tree
Showing 14 changed files with 354 additions and 71 deletions.
36 changes: 32 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ on:
branches:
- main
jobs:
build:
name: Build
build-linux:
name: Linux
runs-on: ${{matrix.os}}
strategy:
matrix:
Expand All @@ -22,9 +22,37 @@ jobs:
uses: actions/checkout@v3
- name: Dependencies
id: dependencies
run: sudo apt-get update && sudo apt-get install build-essential
- name: Build
id: build
run: |
sudo apt-get update
sudo apt-get install build-essential
make dllama
make dllama-api
make funcs-test
make quants-test
make transformer-test
make llama2-tasks-test
make grok1-tasks-test
- name: funcs-test
run: ./funcs-test
- name: quants-test
run: ./quants-test
- name: transformer-test
run: ./transformer-test
- name: llama2-tasks-test
run: ./llama2-tasks-test
- name: grok1-tasks-test
run: ./grok1-tasks-test

build-windows:
name: Windows
runs-on: windows-latest
steps:
- name: Checkout Repo
uses: actions/checkout@v3
- name: Dependencies
id: dependencies
run: choco install make
- name: Build
id: build
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ run*.sh
server
/dllama
/dllama-*
*.exe
22 changes: 15 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
CXX = g++
CXXFLAGS = -std=c++11 -Werror -O3 -march=native -mtune=native

# Conditional settings for Windows
ifeq ($(OS),Windows_NT)
LIBS = -lws2_32 # or -lpthreadGC2 if needed
else
LIBS = -lpthread
endif

utils: src/utils.cpp
$(CXX) $(CXXFLAGS) -c src/utils.cpp -o utils.o
quants: src/quants.cpp
Expand All @@ -27,16 +34,17 @@ app: src/app.cpp
$(CXX) $(CXXFLAGS) -c src/app.cpp -o app.o

dllama: src/apps/dllama/dllama.cpp utils quants funcs socket transformer tasks llama2-tasks grok1-tasks mixtral-tasks tokenizer app
$(CXX) $(CXXFLAGS) src/apps/dllama/dllama.cpp -o dllama utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o grok1-tasks.o mixtral-tasks.o tokenizer.o app.o -lpthread
$(CXX) $(CXXFLAGS) src/apps/dllama/dllama.cpp -o dllama utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o grok1-tasks.o mixtral-tasks.o tokenizer.o app.o $(LIBS)
dllama-api: src/apps/dllama-api/dllama-api.cpp utils quants funcs socket transformer tasks llama2-tasks grok1-tasks mixtral-tasks tokenizer app
$(CXX) $(CXXFLAGS) src/apps/dllama-api/dllama-api.cpp -o dllama-api utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o grok1-tasks.o mixtral-tasks.o tokenizer.o app.o -lpthread
$(CXX) $(CXXFLAGS) src/apps/dllama-api/dllama-api.cpp -o dllama-api utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o grok1-tasks.o mixtral-tasks.o tokenizer.o app.o $(LIBS)

funcs-test: src/funcs-test.cpp funcs utils quants
$(CXX) $(CXXFLAGS) src/funcs-test.cpp -o funcs-test funcs.o utils.o quants.o -lpthread
$(CXX) $(CXXFLAGS) src/funcs-test.cpp -o funcs-test funcs.o utils.o quants.o $(LIBS)
quants-test: src/quants.cpp utils quants
$(CXX) $(CXXFLAGS) src/quants-test.cpp -o quants-test utils.o quants.o -lpthread
$(CXX) $(CXXFLAGS) src/quants-test.cpp -o quants-test utils.o quants.o $(LIBS)
transformer-test: src/transformer-test.cpp funcs utils quants transformer socket
$(CXX) $(CXXFLAGS) src/transformer-test.cpp -o transformer-test funcs.o utils.o quants.o transformer.o socket.o -lpthread
$(CXX) $(CXXFLAGS) src/transformer-test.cpp -o transformer-test funcs.o utils.o quants.o transformer.o socket.o $(LIBS)
llama2-tasks-test: src/llama2-tasks-test.cpp utils quants funcs socket transformer tasks llama2-tasks tokenizer
$(CXX) $(CXXFLAGS) src/llama2-tasks-test.cpp -o llama2-tasks-test utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o tokenizer.o -lpthread
$(CXX) $(CXXFLAGS) src/llama2-tasks-test.cpp -o llama2-tasks-test utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o tokenizer.o $(LIBS)
grok1-tasks-test: src/grok1-tasks-test.cpp utils quants funcs socket transformer tasks llama2-tasks grok1-tasks tokenizer
$(CXX) $(CXXFLAGS) src/grok1-tasks-test.cpp -o grok1-tasks-test utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o grok1-tasks.o tokenizer.o -lpthread
$(CXX) $(CXXFLAGS) src/grok1-tasks-test.cpp -o grok1-tasks-test utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o grok1-tasks.o tokenizer.o $(LIBS)
35 changes: 33 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,11 @@ To add more worker nodes, just add more addresses to the `--workers` argument.

[Share your results](https://github.com/b4rtaz/distributed-llama/discussions)!

## 💻 How to Run on MacOS or Linux
## 💻 How to Run on MacOS, Linux, or Windows

You need to have x86_64 AVX2 CPU or ARM CPU. Different devices may have different CPUs. The below instructions are for Debian-based distributions but you can easily adapt them to your distribution or macOS.
You need to have x86_64 AVX2 CPU or ARM CPU. Different devices may have different CPUs. The below instructions are for Debian-based distributions but you can easily adapt them to your distribution, macOS, or Windows.

### MacOS and Linux

1. Install Git and G++:
```sh
Expand Down Expand Up @@ -188,6 +190,35 @@ sudo nice -n -20 ./dllama inference --model ../dllama_llama-2-7b_q40.bin --token
sudo nice -n -20 ./dllama chat --model ../dllama_llama-2-7b-chat_q40.bin --tokenizer ../dllama-llama2-tokenizer.t --weights-float-type q40 --buffer-float-type q80 --nthreads 4 --workers 192.168.0.1:9998
```

### Windows

1. Install Git and Mingw (Chocolatey):
- https://chocolatey.org/install
```powershell
choco install mingw
```
2. Clone this repository:
```sh
git clone https://github.com/b4rtaz/distributed-llama.git
```
3. Compile Distributed Llama:
```sh
make dllama
```
4. Transfer weights and the tokenizer file to the root node.
5. Run worker nodes on worker devices:
```sh
./dllama worker --port 9998 --nthreads 4
```
6. Run root node on the root device:
```sh
./dllama inference --model ../dllama_llama-2-7b_q40.bin --tokenizer ../dllama-llama2-tokenizer.t --weights-float-type q40 --buffer-float-type q80 --prompt "Hello world" --steps 16 --nthreads 4 --workers 192.168.0.1:9998
```
7. To run the root node in the chat mode:
```sh
./dllama chat --model ../dllama_llama-2-7b-chat_q40.bin --tokenizer ../dllama-llama2-tokenizer.t --weights-float-type q40 --buffer-float-type q80 --nthreads 4 --workers 192.168.0.1:9998
```

[Share your results](https://github.com/b4rtaz/distributed-llama/discussions)!

## 💡 License
Expand Down
1 change: 1 addition & 0 deletions src/app.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <cstdio>
#include <cassert>
#include <stdexcept>
#include <ctime>
#include "app.hpp"

FloatType parseFloatType(char* val) {
Expand Down
8 changes: 7 additions & 1 deletion src/apps/dllama-api/dllama-api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@
#include <cassert>
#include <sstream>
#include <iostream>
#include <vector>

#ifdef _WIN32
#include <winsock2.h>
#include <ws2tcpip.h>
#else
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <vector>
#endif

#include "types.hpp"
#include "../../utils.hpp"
Expand Down
40 changes: 40 additions & 0 deletions src/common/pthread.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef PTHREAD_WRAPPER
#define PTHREAD_WRAPPER

#ifdef _WIN32
#include <windows.h>

typedef HANDLE dl_thread;
typedef DWORD thread_ret_t;
typedef DWORD (WINAPI *thread_func_t)(void *);

static int pthread_create(dl_thread * out, void * unused, thread_func_t func, void * arg) {
(void) unused;
dl_thread handle = CreateThread(NULL, 0, func, arg, 0, NULL);
if (handle == NULL) {
return EAGAIN;
}

*out = handle;
return 0;
}

static int pthread_join(dl_thread thread, void * unused) {
(void) unused;
DWORD ret = WaitForSingleObject(thread, INFINITE);
if (ret == WAIT_FAILED) {
return -1;
}
CloseHandle(thread);
return 0;
}
#else
#include <pthread.h>

typedef pthread_t dl_thread;
typedef void* thread_ret_t;
typedef void* (*thread_func_t)(void *);

#endif

#endif // PTHREAD_WRAPPER
4 changes: 2 additions & 2 deletions src/funcs.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <cmath>
#include <cassert>
#include <cstdio>
#include <pthread.h>
#include "common/pthread.h"
#include "quants.hpp"
#include "funcs.hpp"

Expand Down Expand Up @@ -145,7 +145,7 @@ void rmsnorm(float* o, const float* x, const float ms, const float* weight, cons
}

struct MatmulThreadInfo {
pthread_t handler;
dl_thread handler;
float* output;
const void* input;
const void* weights;
Expand Down
Loading

0 comments on commit 2fa9d9f

Please sign in to comment.