main.py

# -*- coding: utf-8 -*-
"""BERTT.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1QIlNsVXXwpMp8u-BAYEW3BdUEzLbcoge
"""

from google.colab import drive
drive.mount('/content/drive')

import numpy as np
import pandas as pd

!pip install simpletransformers

from simpletransformers.classification import ClassificationModel

a=pd.read_excel("/content/drive/MyDrive/Bert/Projem.xlsx")

a

a["kategori"].unique()

a['labels'] = pd.factorize(a.kategori)[0]

from sklearn.model_selection import train_test_split

train, test = train_test_split(a, test_size=0.2, random_state=42)

train=train[["metin","labels"]]
test=test[["metin","labels"]]

#for bert text = string      label = int
train["metin"]=train["metin"].apply(lambda r: str(r))
train['labels']=train['labels'].astype(int)

model = ClassificationModel('bert', 'dbmdz/bert-base-turkish-uncased', num_labels=5, use_cuda=False,
                            args={'reprocess_input_data': True, 'overwrite_output_dir': True, 'num_train_epochs': 3, "train_batch_size": 64 , "fp16":False, "output_dir": "bert_model"})

model.train_model(train)

result, model_outputs, wrong_predictions = model.eval_model(test)

predictions = model_outputs.argmax(axis=1)

actuals = test.labels.values

predictions[:10]

actuals[:10]

from sklearn.metrics import accuracy_score
accuracy_score(actuals, predictions)

örnek = test.iloc[43]['metin']
print(örnek)

tahmin=model.predict([dene])

if tahmin[0] ==0:
  print("BİLİM VE TEKNOLOJİ")
elif tahmin[0]==1:
  print("EKONOMİ")
elif tahmin[0]==2:
  print("SAĞLIK")
elif tahmin[0]==3:
  print("SİYASET")
else:
  print("SPOR")

#deneme import
denemem=pd.read_excel("/content/drive/MyDrive/Bert/denemem.xlsx",names=["kategori","metin"])

denemem

dene=denemem.iloc[3]["metin"]

örnek=denemem.iloc[11]["metin"]

tahmin=model.predict([örnek])

if tahmin[0] ==0:
  print("BİLİM VE TEKNOLOJİ")
elif tahmin[0]==1:
  print("EKONOMİ")
elif tahmin[0]==2:
  print("SAĞLIK")
elif tahmin[0]==3:
  print("SİYASET")
else:
  print("SPOR")

# ses kaydından sınıflandırma

import speech_recognition as sr

recognizer = sr.Recognizer()

''' recording the sound '''

with sr.AudioFile("/content/drive/MyDrive/Bert/Bilim Ve Teknoloji.wav") as source:
    recorded_audio = recognizer.listen(source)
    print("Done recording")

''' Recorgnizing the Audio '''
try:
    print("Recognizing the text")
    text = recognizer.recognize_google(
            recorded_audio, 
            language='tr-tr'
        )
    model.predict([dene])

except Exception as ex:
    print(ex)
c=model.predict([text])


if c[0] ==0:
  print("BİLİM VE TEKNOLOJİ")
elif c[0]==1:
  print("EKONOMİ")
elif c[0]==2:
  print("SAĞLIK")
elif c[0]==3:
  print("SİYASET")
else:
  print("SPOR")