初始化仓库

chenkui164 · Jul 8, 2022 · 5637dad · 5637dad
1 parent 0cd0001
commit 5637dad
Show file tree

Hide file tree

Showing 42 changed files with 8,261 additions and 2 deletions.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,49 @@
+SRC_PATH = ./src
+DIRS = $(shell find $(SRC_PATH) -maxdepth 3 -type d)
+SRCS_CXX += $(foreach dir, $(DIRS), $(wildcard $(dir)/*.cpp))
+VPATH = $(DIRS)
+OBJECTS=$(addprefix obj/,$(notdir $(SRCS_CXX:.cpp=.o)))
+# OBJECTS=$(SRCS_C:.c=.o)
+DEPENDS=$(OBJECTS:.o=.d)
+CXXFLAGS+=-O3 -std=c++11 
+# CXXFLAGS+=-Wall -fdiagnostics-color=auto -std=c++11 -fext-numeric-literals -O2 -pthread -fopenmp -Wl,-rpath-link=/home/ck/work/am_msk/libs/lib
+# CXXFLAGS+=-Wall -fdiagnostics-color=auto -std=c++11 -fext-numeric-literals -O2 -pthread -fopenmp -Wl,-rpath-link=/home/ck/work/am_msk/libs/lib
+
+ARM_CXX=/home/ck/work/am_msk/cross-pi-gcc-8.3.0-2/bin/arm-linux-gnueabihf-g++
+PC_CXX=g++
+
+CXX=$(PC_CXX)
+
+LDLIBS=-lm -lfftw3f -lopenblas
+
+# LDPATH+=-L/home/ck/work/am_msk/libs/lib
+
+# INCLUDEPATH+=-I /home/ck/work/am_msk/libs/include
+
+
+# test:
+# 	$(OBJECTS)
+
+
+fastasr : $(OBJECTS)
+	$(CXX) -o fastasr $(CXXFLAGS) $(OBJECTS) $(LDLIBS) $(LDPATH)
+obj/%.o:%.cpp
+	$(CXX) -c -o $@ $< $(CXXFLAGS) $(INCLUDEPATH) $(PREDEFINE)
+
+obj/%.d:%.cpp
+	@set -e; rm -f $@; $(CC) -MM $< $(INCLUDEFLAGS) > $@.$$$$; \
+		sed 's,\($*\)\.o[ :]*,obj/\1.o $@ : ,g' < $@.$$$$ > $@; \
+		rm -f $@.$$$$
+
+include $(DEPENDS)
+
+
+
+.PHONY : clean install
+
+install : fastasr
+	expect ./download.ext
+
+clean:
+	@rm -f obj/*
+	@rm -f fastasr
diff --git a/README.md b/README.md
@@ -1,2 +1 @@
-# FastASR
-基于PaddleSpeech所使用的conformer模型，使用C++的高效实现模型推理，在树莓派4B等ARM平台运行也可流畅运行。
+hello
diff --git a/src/Audio.cpp b/src/Audio.cpp
diff --git a/src/Audio.h b/src/Audio.h
@@ -0,0 +1,27 @@
+
+#ifndef AUDIO_H
+#define AUDIO_H
+
+
+#include <stdint.h>
+
+#include "Tensor.h"
+
+class Audio {
+  private:
+    int16_t *speech;
+    int speech_len;
+    int16_t sample_rate;
+
+    void loadwav(const char *filename);
+    void audio2feature();
+    void melspect(float *din, float *dout);
+    void global_cmvn(float *din);
+
+  public:
+    Tensor<float> *fbank_feature;
+    Audio(const char *filename);
+    ~Audio();
+};
+
+#endif
diff --git a/src/CTCDecode.cpp b/src/CTCDecode.cpp
@@ -0,0 +1,196 @@
+#include "CTCDecode.h"
+#include "util.h"
+#include <cblas.h>
+#include <cmath>
+#include <iostream>
+#include <map>
+#include <set>
+#include <string.h>
+using namespace std;
+
+#define vocab_size 5537
+
+CTCdecode::CTCdecode(float *ctc_weight, float *ctc_bias)
+    : ctc_weight(ctc_weight), ctc_bias(ctc_bias)
+{
+}
+
+CTCdecode::~CTCdecode()
+{
+}
+
+float log_add(float *din, int len)
+{
+    float sum = 0;
+    int i;
+    for (i = 0; i < len; i++) {
+        sum = sum + exp(din[i]);
+    }
+    return log(sum);
+}
+
+auto char_cmp = [](CharProb a, CharProb b) { return a.prob < b.prob; };
+auto path_cmp = [](PathProb a, PathProb b) { return a.prob < b.prob; };
+
+void topk(float *din, int len, set<CharProb, decltype(char_cmp)> &s)
+{
+    int i;
+    for (i = 0; i < 10; i++) {
+        CharProb tmp;
+        tmp.char_idx = i;
+        tmp.prob = din[i];
+        s.insert(tmp);
+    }
+
+    float min = s.begin()->prob;
+
+    for (; i < len; i++) {
+        if (din[i] > min) {
+            s.erase(s.begin());
+            CharProb tmp;
+            tmp.char_idx = i;
+            tmp.prob = din[i];
+            s.insert(tmp);
+            min = s.begin()->prob;
+        }
+    }
+}
+
+void ctc_beam_search(Tensor<float> *din, deque<PathProb> &hyps)
+{
+    int tmax = din->size[2];
+    int beam_size = 10;
+    int i;
+
+    set<PathProb, decltype(path_cmp)> curr_hyps_set(path_cmp);
+    PathProb tmp;
+    tmp.pb = 0;
+    tmp.prob = 0;
+    curr_hyps_set.insert(tmp);
+
+    for (i = 0; i < tmax; i++) {
+        set<CharProb, decltype(char_cmp)> char_set(char_cmp);
+        topk(din->buff + i * vocab_size, vocab_size, char_set);
+        map<vector<int>, PathProb> next_next_map;
+        for (auto char_it = char_set.begin(); char_it != char_set.end();
+             ++char_it) {
+            int char_idx = char_it->char_idx;
+            float char_prob = char_it->prob;
+            for (auto hyps_it = curr_hyps_set.begin();
+                 hyps_it != curr_hyps_set.end(); hyps_it++) {
+                int last = -1;
+                if (hyps_it->prefix.size() > 0) {
+                    int ii = hyps_it->prefix.size() - 1;
+                    last = hyps_it->prefix[ii];
+                }
+                vector<int> curr_prefix(hyps_it->prefix);
+                vector<int> next_prefix(hyps_it->prefix);
+                next_prefix.push_back(char_idx);
+
+                if (char_idx == 0) {
+                    auto next_hyps = next_next_map[curr_prefix];
+                    float tmp[] = {next_hyps.pb, hyps_it->pb + char_prob,
+                                   hyps_it->pnb + char_prob};
+                    next_hyps.pb = log_add(tmp, 3);
+                    next_hyps.prefix = curr_prefix;
+                    next_next_map[curr_prefix] = next_hyps;
+                } else if (last == char_idx) {
+                    {
+                        auto next_hyps = next_next_map[curr_prefix];
+                        float tmp[] = {next_hyps.pnb, hyps_it->pnb + char_prob};
+                        next_hyps.pnb = log_add(tmp, 2);
+                        next_hyps.prefix = curr_prefix;
+                        next_next_map[curr_prefix] = next_hyps;
+                    }
+
+                    {
+                        auto next_hyps = next_next_map[next_prefix];
+                        float tmp[] = {next_hyps.pnb, hyps_it->pb + char_prob};
+                        next_hyps.pnb = log_add(tmp, 2);
+                        next_hyps.prefix = next_prefix;
+                        next_next_map[next_prefix] = next_hyps;
+                    }
+                } else {
+                    auto next_hyps = next_next_map[next_prefix];
+                    float tmp[] = {next_hyps.pnb, hyps_it->pb + char_prob,
+                                   hyps_it->pnb + char_prob};
+                    next_hyps.pnb = log_add(tmp, 3);
+                    next_hyps.prefix = next_prefix;
+                    next_next_map[next_prefix] = next_hyps;
+                }
+            }
+        }
+        // kaishi
+        float min = -9999999;
+        int ii = 0;
+        curr_hyps_set.clear();
+        for (auto map_it = next_next_map.begin(); map_it != next_next_map.end();
+             map_it++) {
+            float tmp[] = {map_it->second.pb, map_it->second.pnb};
+            map_it->second.prob = log_add(tmp, 2);
+            if (ii < 10) {
+                curr_hyps_set.insert(map_it->second);
+                min = curr_hyps_set.begin()->prob;
+                ii++;
+            } else {
+                if (min < map_it->second.prob) {
+                    curr_hyps_set.insert(map_it->second);
+                    curr_hyps_set.erase(curr_hyps_set.begin());
+                    min = curr_hyps_set.begin()->prob;
+                }
+            }
+        }
+    }
+
+    for (auto hyps_it = curr_hyps_set.begin(); hyps_it != curr_hyps_set.end();
+         hyps_it++) {
+        hyps.push_front(*hyps_it);
+        // int mm = hyps_it->prefix.size();
+        // cout << hyps_it->prefix.size() << endl;
+        // for (i = 0; i < mm; i++) {
+        //     printf("%d ", hyps_it->prefix[i]);
+        // }
+        // printf("\n");
+        // printf("%f %f %f\n\n", hyps_it->pb, hyps_it->pnb, hyps_it->prob);
+    }
+}
+
+void CTCdecode::show_hyps(deque<PathProb> hyps)
+{
+    for (auto hyps_it = hyps.begin(); hyps_it != hyps.end(); hyps_it++) {
+        int mm = hyps_it->prefix.size();
+        int i;
+        printf("prefix len is %d, val is [", mm);
+        for (i = 0; i < mm - 1; i++) {
+            printf("%d,", hyps_it->prefix[i]);
+        }
+        printf("%d]\n", hyps_it->prefix[i]);
+        printf("pb is %f, pnb is %f, prob is %f\n", hyps_it->pb, hyps_it->pnb,
+               hyps_it->prob);
+    }
+}
+
+void CTCdecode::forward(Tensor<float> *din, deque<PathProb> &hyps)
+{
+
+    int mm = din->size[2];
+    Tensor<float> ctcin(mm, vocab_size);
+    int i;
+    for (i = 0; i < mm; i++) {
+        int offset = i * vocab_size;
+        memcpy(ctcin.buff + offset, ctc_bias, sizeof(float) * vocab_size);
+    }
+
+    cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, mm, vocab_size, 512,
+                1, din->buff, 512, ctc_weight, vocab_size, 1, ctcin.buff,
+                vocab_size);
+
+    for (i = 0; i < mm; i++) {
+        int offset = i * vocab_size;
+        log_softmax(ctcin.buff + offset, vocab_size);
+    }
+
+    ctc_beam_search(&ctcin, hyps);
+    // show_hyps(hyps);
+
+}
diff --git a/src/CTCDecode.h b/src/CTCDecode.h
@@ -0,0 +1,39 @@
+
+#ifndef CTCDECODE_H
+#define CTCDECODE_H
+
+#include <deque>
+#include <math.h>
+#include <stdint.h>
+#include <vector>
+
+#include "Tensor.h"
+#include "WenetParams.h"
+
+using namespace std;
+
+struct CharProb {
+    int char_idx;
+    float prob;
+};
+
+struct PathProb {
+    vector<int> prefix;
+    float pb = -INFINITY;
+    float pnb = -INFINITY;
+    float prob = -INFINITY;
+};
+
+class CTCdecode {
+  private:
+    float *ctc_weight;
+    float *ctc_bias;
+
+  public:
+    CTCdecode(float *ctc_weight, float *ctc_bias);
+    ~CTCdecode();
+    void forward(Tensor<float> *din, deque<PathProb> &hyps);
+    void show_hyps(deque<PathProb> hyps);
+};
+
+#endif
diff --git a/src/ConvModule.cpp b/src/ConvModule.cpp
@@ -0,0 +1,80 @@
+#include "ConvModule.h"
+#include "util.h"
+#include <cblas.h>
+#include <math.h>
+#include <string.h>
+
+ConvModule::ConvModule(EncConvParams *params) : params(params)
+{
+    norm = new LayerNorm(&params->norm, 1e-5f);
+}
+
+ConvModule::~ConvModule()
+{
+}
+
+void glu(Tensor<float> *din, Tensor<float> *dout)
+{
+    int mm = din->buff_size / 1024;
+    int i, j;
+    for (i = 0; i < mm; i++) {
+        for (j = 0; j < 512; j++) {
+            int in_off = i * 1024 + j;
+            int out_off = i * 512 + j;
+            float a = din->buff[in_off];
+            float b = din->buff[in_off + 512];
+            dout->buff[out_off] = a / (1 + exp(-b));
+        }
+    }
+}
+
+void ConvModule::forward(Tensor<float> *din)
+{
+    int mm = din->size[2];
+    Tensor<float> tmp(mm, 1024);
+    int i, j;
+    for (i = 0; i < mm; i++) {
+        int offset = i * 1024;
+        memcpy(tmp.buff + offset, params->pointwise_conv1_bias,
+               sizeof(float) * 1024);
+    }
+
+    cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, mm, 1024, 512, 1,
+                din->buff, 512, params->pointwise_conv1_weight, 512, 1,
+                tmp.buff, 1024);
+    glu(&tmp, din);
+
+    Tensor<float> conv_in(1, mm + 14);
+    Tensor<float> blasin(mm, 15);
+    conv_in.zeros();
+
+    for (i = 0; i < 512; i++) {
+        for (j = 0; j < mm; j++) {
+            int ii = j * 512 + i;
+            conv_in.buff[j + 7] = din->buff[ii];
+            din->buff[ii] = params->depthwise_conv_bias[i];
+        }
+        for (j = 0; j < mm; j++) {
+            int offset = j * 15;
+            memcpy(blasin.buff + offset, conv_in.buff + j, 15 * sizeof(float));
+        }
+
+        cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, mm, 1, 15, 1,
+                    blasin.buff, 15, params->depthwise_conv_weight + i * 15, 1,
+                    1, din->buff + i, 512);
+    }
+
+    norm->forward(din);
+    swish(din);
+
+    Tensor<float> tmp2(din);
+    for (i = 0; i < mm; i++) {
+        int offset = i * 512;
+        memcpy(din->buff + offset, params->pointwise_conv2_bias,
+               sizeof(float) * 512);
+    }
+
+    cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, mm, 512, 512, 1,
+                tmp2.buff, 512, params->pointwise_conv2_weight, 512, 1,
+                din->buff, 512);
+}