diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..16d5987 --- /dev/null +++ b/.gitignore @@ -0,0 +1,37 @@ +/data +/logs + +# Python +__pycache__/ +*.py[cod] + +# Virtual environment +venv/ +env/ +*.egg-info/ +dist/ + +# IDE files +.idea/ +.vscode/ + +# Compiled files +*.pyc +*.pyo +*.pyd + +# Temporary files +*.bak +*.swp +*~ + +# Logs +*.log + +# User-specific files +*.suo +*.user +*.sln.docstates + +# macOS +.DS_Store diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f475555 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +numpy==1.24.3 +scipy==1.11.2 +scikit-learn==1.3.0 +librosa==0.10.1 +tensorflow==2.13.0 +pyaudio==0.2.13 +matplotlib==3.7.0 \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..b2027cb --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,10 @@ +# __init__.py + +VERSION = "1.0.0" + +from . import capture_audio +from . import process_audio +from . import feature_extraction +from . import machine_learning + + diff --git a/src/capture_audio.py b/src/capture_audio.py new file mode 100644 index 0000000..b5f55e0 --- /dev/null +++ b/src/capture_audio.py @@ -0,0 +1,51 @@ +# capture audio file from microphone +import pyaudio +import wave +import time +import sys + +from logging_config import logger + +# duration in seconds +def capture_audio_to_file(output_file, duration=5, sample_rate=44100, channels=1): + audio = pyaudio.PyAudio() + + try: + stream = audio.open(format=pyaudio.paInt16, + channels=channels, + rate=sample_rate, + input=True, + frames_per_buffer=1024) + + print(f"Recording audio for {duration} seconds...") + + frames = [] + for _ in range(0, int(sample_rate / 1024 * duration)): + data = stream.read(1024) + frames.append(data) + + print("Recording finished.") + + stream.stop_stream() + stream.close() + audio.terminate() + + # write to output wav file + with wave.open(output_file, 'wb') as wf: + wf.setnchannels(channels) + wf.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) + wf.setframerate(sample_rate) + wf.writeframes(b''.join(frames)) + + except Exception as e: + print(f"Error: {str(e)}") + logger.error(f"Error: {str(e)}") + return False + + return True + +# test +if __name__ == "__main__": + timestr = time.strftime("%Y%m%d_%H%M") + out_filename = f"../data/audio_samples/recorded_audio_{timestr}.wav" + capture_audio_to_file(out_filename, duration=5) diff --git a/src/feature_extraction.py b/src/feature_extraction.py new file mode 100644 index 0000000..e69de29 diff --git a/src/logging_config.py b/src/logging_config.py new file mode 100644 index 0000000..4ae6db5 --- /dev/null +++ b/src/logging_config.py @@ -0,0 +1,9 @@ +import logging + +logging.basicConfig( + filename='./logs/error.log', + level=logging.ERROR, + format='%(asctime)s - %(levelname)s - %(message)s' +) + +logger = logging.getLogger(__name__) diff --git a/src/machine_learning.py b/src/machine_learning.py new file mode 100644 index 0000000..e69de29 diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..e69de29 diff --git a/src/process_audio.py b/src/process_audio.py new file mode 100644 index 0000000..3ca08c5 --- /dev/null +++ b/src/process_audio.py @@ -0,0 +1,36 @@ +import librosa +import librosa.display +import matplotlib.pyplot as plt +from logging_config import logger + + +def process_audio_file(audio_file): + try: + # Load the audio file + y, sr = librosa.load(audio_file) + + # Extract audio features (e.g., MFCCs) + mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40) + + # Display the audio waveform and MFCCs + plt.figure(figsize=(10, 6)) + plt.subplot(2, 1, 1) + librosa.display.waveshow(y, sr=sr) + plt.title('Audio Waveform') + + plt.subplot(2, 1, 2) + librosa.display.specshow(mfccs, x_axis='time') + plt.colorbar(format='%+2.0f dB') + plt.title('MFCCs') + + plt.tight_layout() + plt.show() + + except Exception as e: + print(f"Error: {str(e)}") + logger.error(f"Error: {str(e)}") + + +if __name__ == "__main__": + recorded_audio_file = "../data/audio_samples/recorded_audio_20230911_1655.wav" + process_audio_file(recorded_audio_file)