Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: windows support #63

Merged
merged 34 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
844d084
Windows support!
May 24, 2024
835b639
Fixed linux build
May 24, 2024
d059217
Small fix
May 24, 2024
90e7a6d
Removed bin dir output from makefile
May 24, 2024
84e19db
Update .gitignore
DifferentialityDevelopment May 25, 2024
0499f95
Update .gitignore
DifferentialityDevelopment May 25, 2024
0c2a849
refactor: Added isEagainError function to socket.cpp
May 25, 2024
48fa75f
Refactored some more code in socket.cpp
May 25, 2024
7ef3eeb
Refactored makefile
May 25, 2024
5201f57
Trying to resolve .gitignore conflict?
May 25, 2024
ea175d5
Some more refactoring
May 25, 2024
dbde6ca
Refactored utils.cpp
May 25, 2024
6f960a6
Updated readme
May 27, 2024
92860f2
Add windows build to workflow file
May 27, 2024
618a33e
Updated main.yml
May 27, 2024
daea68f
Another attempt
May 27, 2024
044ca10
Fix syntax
May 27, 2024
4630f9b
Small tweak
May 27, 2024
44448ca
Trying to get it work
May 27, 2024
4fb52ca
Testing
May 27, 2024
093e1b8
Trying to correct script path
May 27, 2024
094c8da
Moved scripts to scripts folder and made dependencies.sh executable
May 27, 2024
3d188ce
Removed old path
May 27, 2024
b16804f
Another fix
May 27, 2024
fc729a1
install_dependencies.
b4rtaz May 27, 2024
0efc59e
install_dependencies.
b4rtaz May 27, 2024
596121e
newBuffer & freeBuffer.
b4rtaz May 27, 2024
1248d2b
mmap file.
b4rtaz May 27, 2024
c15d643
fix: include.
b4rtaz May 27, 2024
ab06abe
polishing.
b4rtaz May 27, 2024
8e16486
main.yml.
b4rtaz May 27, 2024
2afbede
main.yml.
b4rtaz May 27, 2024
5e84e46
cleaning.
b4rtaz May 27, 2024
589519b
polishing.
b4rtaz May 27, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 32 additions & 4 deletions .github/workflows/main.yml
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't know how I swapped those two around! Must have been by accident..

Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ on:
branches:
- main
jobs:
build:
name: Build
build-linux:
name: Linux
runs-on: ${{matrix.os}}
strategy:
matrix:
Expand All @@ -22,9 +22,37 @@ jobs:
uses: actions/checkout@v3
- name: Dependencies
id: dependencies
run: sudo apt-get update && sudo apt-get install build-essential
- name: Build
id: build
run: |
sudo apt-get update
sudo apt-get install build-essential
make dllama
make dllama-api
make funcs-test
make quants-test
make transformer-test
make llama2-tasks-test
make grok1-tasks-test
- name: funcs-test
run: ./funcs-test
- name: quants-test
run: ./quants-test
- name: transformer-test
run: ./transformer-test
- name: llama2-tasks-test
run: ./llama2-tasks-test
- name: grok1-tasks-test
run: ./grok1-tasks-test

build-windows:
name: Windows
runs-on: windows-latest
steps:
- name: Checkout Repo
uses: actions/checkout@v3
- name: Dependencies
id: dependencies
run: choco install make
- name: Build
id: build
run: |
Expand Down
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
*.o
*.dSYM
*.data
*.bin
__pycache__

*-test
main
run.sh
run*.sh
server
/dllama
/dllama-*
*.exe
22 changes: 15 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
CXX = g++
CXXFLAGS = -std=c++11 -Werror -O3 -march=native -mtune=native

# Conditional settings for Windows
ifeq ($(OS),Windows_NT)
LIBS = -lws2_32 # or -lpthreadGC2 if needed
else
LIBS = -lpthread
endif

utils: src/utils.cpp
$(CXX) $(CXXFLAGS) -c src/utils.cpp -o utils.o
quants: src/quants.cpp
Expand All @@ -27,16 +34,17 @@ app: src/app.cpp
$(CXX) $(CXXFLAGS) -c src/app.cpp -o app.o

dllama: src/apps/dllama/dllama.cpp utils quants funcs socket transformer tasks llama2-tasks grok1-tasks mixtral-tasks tokenizer app
$(CXX) $(CXXFLAGS) src/apps/dllama/dllama.cpp -o dllama utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o grok1-tasks.o mixtral-tasks.o tokenizer.o app.o -lpthread
$(CXX) $(CXXFLAGS) src/apps/dllama/dllama.cpp -o dllama utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o grok1-tasks.o mixtral-tasks.o tokenizer.o app.o $(LIBS)
dllama-api: src/apps/dllama-api/dllama-api.cpp utils quants funcs socket transformer tasks llama2-tasks grok1-tasks mixtral-tasks tokenizer app
$(CXX) $(CXXFLAGS) src/apps/dllama-api/dllama-api.cpp -o dllama-api utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o grok1-tasks.o mixtral-tasks.o tokenizer.o app.o -lpthread
$(CXX) $(CXXFLAGS) src/apps/dllama-api/dllama-api.cpp -o dllama-api utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o grok1-tasks.o mixtral-tasks.o tokenizer.o app.o $(LIBS)

funcs-test: src/funcs-test.cpp funcs utils quants
$(CXX) $(CXXFLAGS) src/funcs-test.cpp -o funcs-test funcs.o utils.o quants.o -lpthread
$(CXX) $(CXXFLAGS) src/funcs-test.cpp -o funcs-test funcs.o utils.o quants.o $(LIBS)
quants-test: src/quants.cpp utils quants
$(CXX) $(CXXFLAGS) src/quants-test.cpp -o quants-test utils.o quants.o -lpthread
$(CXX) $(CXXFLAGS) src/quants-test.cpp -o quants-test utils.o quants.o $(LIBS)
transformer-test: src/transformer-test.cpp funcs utils quants transformer socket
$(CXX) $(CXXFLAGS) src/transformer-test.cpp -o transformer-test funcs.o utils.o quants.o transformer.o socket.o -lpthread
$(CXX) $(CXXFLAGS) src/transformer-test.cpp -o transformer-test funcs.o utils.o quants.o transformer.o socket.o $(LIBS)
llama2-tasks-test: src/llama2-tasks-test.cpp utils quants funcs socket transformer tasks llama2-tasks tokenizer
$(CXX) $(CXXFLAGS) src/llama2-tasks-test.cpp -o llama2-tasks-test utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o tokenizer.o -lpthread
$(CXX) $(CXXFLAGS) src/llama2-tasks-test.cpp -o llama2-tasks-test utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o tokenizer.o $(LIBS)
grok1-tasks-test: src/grok1-tasks-test.cpp utils quants funcs socket transformer tasks llama2-tasks grok1-tasks tokenizer
$(CXX) $(CXXFLAGS) src/grok1-tasks-test.cpp -o grok1-tasks-test utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o grok1-tasks.o tokenizer.o -lpthread
$(CXX) $(CXXFLAGS) src/grok1-tasks-test.cpp -o grok1-tasks-test utils.o quants.o funcs.o socket.o transformer.o tasks.o llama2-tasks.o grok1-tasks.o tokenizer.o $(LIBS)
35 changes: 33 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,11 @@ To add more worker nodes, just add more addresses to the `--workers` argument.

[Share your results](https://github.com/b4rtaz/distributed-llama/discussions)!

## 💻 How to Run on MacOS or Linux
## 💻 How to Run on MacOS, Linux, or Windows

You need to have x86_64 AVX2 CPU or ARM CPU. Different devices may have different CPUs. The below instructions are for Debian-based distributions but you can easily adapt them to your distribution or macOS.
You need to have x86_64 AVX2 CPU or ARM CPU. Different devices may have different CPUs. The below instructions are for Debian-based distributions but you can easily adapt them to your distribution, macOS, or Windows.

### MacOS and Linux

1. Install Git and G++:
```sh
Expand Down Expand Up @@ -182,6 +184,35 @@ sudo nice -n -20 ./dllama inference --model ../dllama_llama-2-7b_q40.bin --token
sudo nice -n -20 ./dllama chat --model ../dllama_llama-2-7b-chat_q40.bin --tokenizer ../dllama-llama2-tokenizer.t --weights-float-type q40 --buffer-float-type q80 --nthreads 4 --workers 192.168.0.1:9998
```

### Windows

1. Install Git and Mingw (Chocolatey):
- https://chocolatey.org/install
```powershell
choco install mingw
```
2. Clone this repository:
```sh
git clone https://github.com/b4rtaz/distributed-llama.git
```
3. Compile Distributed Llama:
```sh
make dllama
```
4. Transfer weights and the tokenizer file to the root node.
5. Run worker nodes on worker devices:
```sh
./dllama worker --port 9998 --nthreads 4
```
6. Run root node on the root device:
```sh
./dllama inference --model ../dllama_llama-2-7b_q40.bin --tokenizer ../dllama-llama2-tokenizer.t --weights-float-type q40 --buffer-float-type q80 --prompt "Hello world" --steps 16 --nthreads 4 --workers 192.168.0.1:9998
```
7. To run the root node in the chat mode:
```sh
./dllama chat --model ../dllama_llama-2-7b-chat_q40.bin --tokenizer ../dllama-llama2-tokenizer.t --weights-float-type q40 --buffer-float-type q80 --nthreads 4 --workers 192.168.0.1:9998
```

[Share your results](https://github.com/b4rtaz/distributed-llama/discussions)!

## 💡 License
Expand Down
1 change: 1 addition & 0 deletions src/app.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <cstdio>
#include <cassert>
#include <stdexcept>
#include <ctime>
#include "app.hpp"

FloatType parseFloatType(char* val) {
Expand Down
8 changes: 7 additions & 1 deletion src/apps/dllama-api/dllama-api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@
#include <cassert>
#include <sstream>
#include <iostream>
#include <vector>

#ifdef _WIN32
#include <winsock2.h>
#include <ws2tcpip.h>
#else
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <vector>
#endif

#include "../../utils.hpp"
#include "../../socket.hpp"
Expand Down
40 changes: 40 additions & 0 deletions src/common/pthread.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef PTHREAD_WRAPPER
#define PTHREAD_WRAPPER

#ifdef _WIN32
#include <windows.h>

typedef HANDLE dl_thread;
typedef DWORD thread_ret_t;
typedef DWORD (WINAPI *thread_func_t)(void *);

static int pthread_create(dl_thread * out, void * unused, thread_func_t func, void * arg) {
(void) unused;
dl_thread handle = CreateThread(NULL, 0, func, arg, 0, NULL);
if (handle == NULL) {
return EAGAIN;
}

*out = handle;
return 0;
}

static int pthread_join(dl_thread thread, void * unused) {
(void) unused;
DWORD ret = WaitForSingleObject(thread, INFINITE);
if (ret == WAIT_FAILED) {
return -1;
}
CloseHandle(thread);
return 0;
}
#else
#include <pthread.h>

typedef pthread_t dl_thread;
typedef void* thread_ret_t;
typedef void* (*thread_func_t)(void *);

#endif

#endif // PTHREAD_WRAPPER
4 changes: 2 additions & 2 deletions src/funcs.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <cmath>
#include <cassert>
#include <cstdio>
#include <pthread.h>
#include "common/pthread.h"
#include "quants.hpp"
#include "funcs.hpp"

Expand Down Expand Up @@ -145,7 +145,7 @@ void rmsnorm(float* o, const float* x, const float ms, const float* weight, cons
}

struct MatmulThreadInfo {
pthread_t handler;
dl_thread handler;
float* output;
const void* input;
const void* weights;
Expand Down
Loading
Loading