-
Notifications
You must be signed in to change notification settings - Fork 76
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
chenkui164
committed
Jul 8, 2022
1 parent
0cd0001
commit 5637dad
Showing
42 changed files
with
8,261 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
SRC_PATH = ./src | ||
DIRS = $(shell find $(SRC_PATH) -maxdepth 3 -type d) | ||
SRCS_CXX += $(foreach dir, $(DIRS), $(wildcard $(dir)/*.cpp)) | ||
VPATH = $(DIRS) | ||
OBJECTS=$(addprefix obj/,$(notdir $(SRCS_CXX:.cpp=.o))) | ||
# OBJECTS=$(SRCS_C:.c=.o) | ||
DEPENDS=$(OBJECTS:.o=.d) | ||
CXXFLAGS+=-O3 -std=c++11 | ||
# CXXFLAGS+=-Wall -fdiagnostics-color=auto -std=c++11 -fext-numeric-literals -O2 -pthread -fopenmp -Wl,-rpath-link=/home/ck/work/am_msk/libs/lib | ||
# CXXFLAGS+=-Wall -fdiagnostics-color=auto -std=c++11 -fext-numeric-literals -O2 -pthread -fopenmp -Wl,-rpath-link=/home/ck/work/am_msk/libs/lib | ||
|
||
ARM_CXX=/home/ck/work/am_msk/cross-pi-gcc-8.3.0-2/bin/arm-linux-gnueabihf-g++ | ||
PC_CXX=g++ | ||
|
||
CXX=$(PC_CXX) | ||
|
||
LDLIBS=-lm -lfftw3f -lopenblas | ||
|
||
# LDPATH+=-L/home/ck/work/am_msk/libs/lib | ||
|
||
# INCLUDEPATH+=-I /home/ck/work/am_msk/libs/include | ||
|
||
|
||
# test: | ||
# $(OBJECTS) | ||
|
||
|
||
fastasr : $(OBJECTS) | ||
$(CXX) -o fastasr $(CXXFLAGS) $(OBJECTS) $(LDLIBS) $(LDPATH) | ||
obj/%.o:%.cpp | ||
$(CXX) -c -o $@ $< $(CXXFLAGS) $(INCLUDEPATH) $(PREDEFINE) | ||
|
||
obj/%.d:%.cpp | ||
@set -e; rm -f $@; $(CC) -MM $< $(INCLUDEFLAGS) > $@.$$$$; \ | ||
sed 's,\($*\)\.o[ :]*,obj/\1.o $@ : ,g' < $@.$$$$ > $@; \ | ||
rm -f $@.$$$$ | ||
|
||
include $(DEPENDS) | ||
|
||
|
||
|
||
.PHONY : clean install | ||
|
||
install : fastasr | ||
expect ./download.ext | ||
|
||
clean: | ||
@rm -f obj/* | ||
@rm -f fastasr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1 @@ | ||
# FastASR | ||
基于PaddleSpeech所使用的conformer模型,使用C++的高效实现模型推理,在树莓派4B等ARM平台运行也可流畅运行。 | ||
hello |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
|
||
#ifndef AUDIO_H | ||
#define AUDIO_H | ||
|
||
|
||
#include <stdint.h> | ||
|
||
#include "Tensor.h" | ||
|
||
class Audio { | ||
private: | ||
int16_t *speech; | ||
int speech_len; | ||
int16_t sample_rate; | ||
|
||
void loadwav(const char *filename); | ||
void audio2feature(); | ||
void melspect(float *din, float *dout); | ||
void global_cmvn(float *din); | ||
|
||
public: | ||
Tensor<float> *fbank_feature; | ||
Audio(const char *filename); | ||
~Audio(); | ||
}; | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
#include "CTCDecode.h" | ||
#include "util.h" | ||
#include <cblas.h> | ||
#include <cmath> | ||
#include <iostream> | ||
#include <map> | ||
#include <set> | ||
#include <string.h> | ||
using namespace std; | ||
|
||
#define vocab_size 5537 | ||
|
||
CTCdecode::CTCdecode(float *ctc_weight, float *ctc_bias) | ||
: ctc_weight(ctc_weight), ctc_bias(ctc_bias) | ||
{ | ||
} | ||
|
||
CTCdecode::~CTCdecode() | ||
{ | ||
} | ||
|
||
float log_add(float *din, int len) | ||
{ | ||
float sum = 0; | ||
int i; | ||
for (i = 0; i < len; i++) { | ||
sum = sum + exp(din[i]); | ||
} | ||
return log(sum); | ||
} | ||
|
||
auto char_cmp = [](CharProb a, CharProb b) { return a.prob < b.prob; }; | ||
auto path_cmp = [](PathProb a, PathProb b) { return a.prob < b.prob; }; | ||
|
||
void topk(float *din, int len, set<CharProb, decltype(char_cmp)> &s) | ||
{ | ||
int i; | ||
for (i = 0; i < 10; i++) { | ||
CharProb tmp; | ||
tmp.char_idx = i; | ||
tmp.prob = din[i]; | ||
s.insert(tmp); | ||
} | ||
|
||
float min = s.begin()->prob; | ||
|
||
for (; i < len; i++) { | ||
if (din[i] > min) { | ||
s.erase(s.begin()); | ||
CharProb tmp; | ||
tmp.char_idx = i; | ||
tmp.prob = din[i]; | ||
s.insert(tmp); | ||
min = s.begin()->prob; | ||
} | ||
} | ||
} | ||
|
||
void ctc_beam_search(Tensor<float> *din, deque<PathProb> &hyps) | ||
{ | ||
int tmax = din->size[2]; | ||
int beam_size = 10; | ||
int i; | ||
|
||
set<PathProb, decltype(path_cmp)> curr_hyps_set(path_cmp); | ||
PathProb tmp; | ||
tmp.pb = 0; | ||
tmp.prob = 0; | ||
curr_hyps_set.insert(tmp); | ||
|
||
for (i = 0; i < tmax; i++) { | ||
set<CharProb, decltype(char_cmp)> char_set(char_cmp); | ||
topk(din->buff + i * vocab_size, vocab_size, char_set); | ||
map<vector<int>, PathProb> next_next_map; | ||
for (auto char_it = char_set.begin(); char_it != char_set.end(); | ||
++char_it) { | ||
int char_idx = char_it->char_idx; | ||
float char_prob = char_it->prob; | ||
for (auto hyps_it = curr_hyps_set.begin(); | ||
hyps_it != curr_hyps_set.end(); hyps_it++) { | ||
int last = -1; | ||
if (hyps_it->prefix.size() > 0) { | ||
int ii = hyps_it->prefix.size() - 1; | ||
last = hyps_it->prefix[ii]; | ||
} | ||
vector<int> curr_prefix(hyps_it->prefix); | ||
vector<int> next_prefix(hyps_it->prefix); | ||
next_prefix.push_back(char_idx); | ||
|
||
if (char_idx == 0) { | ||
auto next_hyps = next_next_map[curr_prefix]; | ||
float tmp[] = {next_hyps.pb, hyps_it->pb + char_prob, | ||
hyps_it->pnb + char_prob}; | ||
next_hyps.pb = log_add(tmp, 3); | ||
next_hyps.prefix = curr_prefix; | ||
next_next_map[curr_prefix] = next_hyps; | ||
} else if (last == char_idx) { | ||
{ | ||
auto next_hyps = next_next_map[curr_prefix]; | ||
float tmp[] = {next_hyps.pnb, hyps_it->pnb + char_prob}; | ||
next_hyps.pnb = log_add(tmp, 2); | ||
next_hyps.prefix = curr_prefix; | ||
next_next_map[curr_prefix] = next_hyps; | ||
} | ||
|
||
{ | ||
auto next_hyps = next_next_map[next_prefix]; | ||
float tmp[] = {next_hyps.pnb, hyps_it->pb + char_prob}; | ||
next_hyps.pnb = log_add(tmp, 2); | ||
next_hyps.prefix = next_prefix; | ||
next_next_map[next_prefix] = next_hyps; | ||
} | ||
} else { | ||
auto next_hyps = next_next_map[next_prefix]; | ||
float tmp[] = {next_hyps.pnb, hyps_it->pb + char_prob, | ||
hyps_it->pnb + char_prob}; | ||
next_hyps.pnb = log_add(tmp, 3); | ||
next_hyps.prefix = next_prefix; | ||
next_next_map[next_prefix] = next_hyps; | ||
} | ||
} | ||
} | ||
// kaishi | ||
float min = -9999999; | ||
int ii = 0; | ||
curr_hyps_set.clear(); | ||
for (auto map_it = next_next_map.begin(); map_it != next_next_map.end(); | ||
map_it++) { | ||
float tmp[] = {map_it->second.pb, map_it->second.pnb}; | ||
map_it->second.prob = log_add(tmp, 2); | ||
if (ii < 10) { | ||
curr_hyps_set.insert(map_it->second); | ||
min = curr_hyps_set.begin()->prob; | ||
ii++; | ||
} else { | ||
if (min < map_it->second.prob) { | ||
curr_hyps_set.insert(map_it->second); | ||
curr_hyps_set.erase(curr_hyps_set.begin()); | ||
min = curr_hyps_set.begin()->prob; | ||
} | ||
} | ||
} | ||
} | ||
|
||
for (auto hyps_it = curr_hyps_set.begin(); hyps_it != curr_hyps_set.end(); | ||
hyps_it++) { | ||
hyps.push_front(*hyps_it); | ||
// int mm = hyps_it->prefix.size(); | ||
// cout << hyps_it->prefix.size() << endl; | ||
// for (i = 0; i < mm; i++) { | ||
// printf("%d ", hyps_it->prefix[i]); | ||
// } | ||
// printf("\n"); | ||
// printf("%f %f %f\n\n", hyps_it->pb, hyps_it->pnb, hyps_it->prob); | ||
} | ||
} | ||
|
||
void CTCdecode::show_hyps(deque<PathProb> hyps) | ||
{ | ||
for (auto hyps_it = hyps.begin(); hyps_it != hyps.end(); hyps_it++) { | ||
int mm = hyps_it->prefix.size(); | ||
int i; | ||
printf("prefix len is %d, val is [", mm); | ||
for (i = 0; i < mm - 1; i++) { | ||
printf("%d,", hyps_it->prefix[i]); | ||
} | ||
printf("%d]\n", hyps_it->prefix[i]); | ||
printf("pb is %f, pnb is %f, prob is %f\n", hyps_it->pb, hyps_it->pnb, | ||
hyps_it->prob); | ||
} | ||
} | ||
|
||
void CTCdecode::forward(Tensor<float> *din, deque<PathProb> &hyps) | ||
{ | ||
|
||
int mm = din->size[2]; | ||
Tensor<float> ctcin(mm, vocab_size); | ||
int i; | ||
for (i = 0; i < mm; i++) { | ||
int offset = i * vocab_size; | ||
memcpy(ctcin.buff + offset, ctc_bias, sizeof(float) * vocab_size); | ||
} | ||
|
||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, mm, vocab_size, 512, | ||
1, din->buff, 512, ctc_weight, vocab_size, 1, ctcin.buff, | ||
vocab_size); | ||
|
||
for (i = 0; i < mm; i++) { | ||
int offset = i * vocab_size; | ||
log_softmax(ctcin.buff + offset, vocab_size); | ||
} | ||
|
||
ctc_beam_search(&ctcin, hyps); | ||
// show_hyps(hyps); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
|
||
#ifndef CTCDECODE_H | ||
#define CTCDECODE_H | ||
|
||
#include <deque> | ||
#include <math.h> | ||
#include <stdint.h> | ||
#include <vector> | ||
|
||
#include "Tensor.h" | ||
#include "WenetParams.h" | ||
|
||
using namespace std; | ||
|
||
struct CharProb { | ||
int char_idx; | ||
float prob; | ||
}; | ||
|
||
struct PathProb { | ||
vector<int> prefix; | ||
float pb = -INFINITY; | ||
float pnb = -INFINITY; | ||
float prob = -INFINITY; | ||
}; | ||
|
||
class CTCdecode { | ||
private: | ||
float *ctc_weight; | ||
float *ctc_bias; | ||
|
||
public: | ||
CTCdecode(float *ctc_weight, float *ctc_bias); | ||
~CTCdecode(); | ||
void forward(Tensor<float> *din, deque<PathProb> &hyps); | ||
void show_hyps(deque<PathProb> hyps); | ||
}; | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#include "ConvModule.h" | ||
#include "util.h" | ||
#include <cblas.h> | ||
#include <math.h> | ||
#include <string.h> | ||
|
||
ConvModule::ConvModule(EncConvParams *params) : params(params) | ||
{ | ||
norm = new LayerNorm(¶ms->norm, 1e-5f); | ||
} | ||
|
||
ConvModule::~ConvModule() | ||
{ | ||
} | ||
|
||
void glu(Tensor<float> *din, Tensor<float> *dout) | ||
{ | ||
int mm = din->buff_size / 1024; | ||
int i, j; | ||
for (i = 0; i < mm; i++) { | ||
for (j = 0; j < 512; j++) { | ||
int in_off = i * 1024 + j; | ||
int out_off = i * 512 + j; | ||
float a = din->buff[in_off]; | ||
float b = din->buff[in_off + 512]; | ||
dout->buff[out_off] = a / (1 + exp(-b)); | ||
} | ||
} | ||
} | ||
|
||
void ConvModule::forward(Tensor<float> *din) | ||
{ | ||
int mm = din->size[2]; | ||
Tensor<float> tmp(mm, 1024); | ||
int i, j; | ||
for (i = 0; i < mm; i++) { | ||
int offset = i * 1024; | ||
memcpy(tmp.buff + offset, params->pointwise_conv1_bias, | ||
sizeof(float) * 1024); | ||
} | ||
|
||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, mm, 1024, 512, 1, | ||
din->buff, 512, params->pointwise_conv1_weight, 512, 1, | ||
tmp.buff, 1024); | ||
glu(&tmp, din); | ||
|
||
Tensor<float> conv_in(1, mm + 14); | ||
Tensor<float> blasin(mm, 15); | ||
conv_in.zeros(); | ||
|
||
for (i = 0; i < 512; i++) { | ||
for (j = 0; j < mm; j++) { | ||
int ii = j * 512 + i; | ||
conv_in.buff[j + 7] = din->buff[ii]; | ||
din->buff[ii] = params->depthwise_conv_bias[i]; | ||
} | ||
for (j = 0; j < mm; j++) { | ||
int offset = j * 15; | ||
memcpy(blasin.buff + offset, conv_in.buff + j, 15 * sizeof(float)); | ||
} | ||
|
||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, mm, 1, 15, 1, | ||
blasin.buff, 15, params->depthwise_conv_weight + i * 15, 1, | ||
1, din->buff + i, 512); | ||
} | ||
|
||
norm->forward(din); | ||
swish(din); | ||
|
||
Tensor<float> tmp2(din); | ||
for (i = 0; i < mm; i++) { | ||
int offset = i * 512; | ||
memcpy(din->buff + offset, params->pointwise_conv2_bias, | ||
sizeof(float) * 512); | ||
} | ||
|
||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, mm, 512, 512, 1, | ||
tmp2.buff, 512, params->pointwise_conv2_weight, 512, 1, | ||
din->buff, 512); | ||
} |
Oops, something went wrong.