Skip to content

Commit

Permalink
fix example script and update config example and readme
Browse files Browse the repository at this point in the history
  • Loading branch information
martianbug committed Oct 2, 2024
1 parent c69b521 commit 3048288
Show file tree
Hide file tree
Showing 5 changed files with 24,208 additions and 19 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ docs/source/musif.log.2023-01-31
docs/source/.ipynb_checkpoints
dist
.pdm-python
musif.log
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ For jSymbolic features, musif currently does not support the integration of thes
Java JRE >= 8 must be installed in your OS. Download jSymbolic from https://sourceforge.net/projects/jmir/files/jSymbolic/

*Important*: right now music21 features are NOT guaranteed to be compatible with musif's cache system. Native musif's features work with cache system just fine.


## Example
Check and run run_extraction.py to see a initial script for extracting xml files by using musif.

## Testing and features extraction
Apart from the documentation of musif, where Tutorials and example code can be found, please feel free to clone and check this repository, where musif is used to extract features from different corpuses
https://github.com/DIDONEproject/music_symbolic_features
Expand Down
9 changes: 4 additions & 5 deletions config_extraction_example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ expand_repeats: false
window_size: null

# Size of overlap (in measures) beatween each window. Considered only if `window_size` is
# null
# not null. It must be less than window_size - 1!
overlap: 2

# Number of parallel processes to be used as defined by joblib:
Expand All @@ -64,9 +64,8 @@ log:
# in errors when extracting features.
# Dependencies may be expressed in the `musif_dependencies` property of the feature, see
# for instance `musif.extract.features.density.__init__.py`
basic_modules: []
# Possible values:
# - scoring
basic_modules: ['scoring']
# Other values: file_name_generic

# Modules to be extracting from scores. Order is important, so last ones will be
# processed last. Some of them might depends on others.
Expand Down Expand Up @@ -111,7 +110,7 @@ remove_unpitched_objects: true
# Filter to select only some instruments to be processed for each score.
# If the filter is None (disabling all parts), the program will extract all parts.
parts_filter: []
# Possible Values
# Possible Values. If non it selected, all parts will be computed.
# - vnI
# - vnII
# - obI
Expand Down
20 changes: 7 additions & 13 deletions run_extraction_example.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,27 @@
import sys

from musif.config import GenericConfiguration
from musif.extract.features.core.constants import FILE_NAME
import os
from pathlib import Path

import pandas as pd
from feature_extraction.custom_conf import CustomConf
from musif.extract.extract import FeaturesExtractor

from musiF.musif.process.processor import DataProcessor
from musif.process.processor import DataProcessor

# MAIN FILE to run extractions of data by Didone Project.

# directory containing xml files
data_dir = Path("data") / "xml"
data_dir = Path("xml_example") / "example.xml"
DEST_PATH = "destination_path"


# directory containing .pkl files in case of previous extractions for cache
cache_dir = None

# csv file containing files which raised error and need to be reextracted
path_error = 'martiser/error_files.csv'
errored_files = list(pd.read_csv(path_error, low_memory=False)[FILE_NAME])
# path_error = 'error_files.csv'
# errored_files = list(pd.read_csv(path_error, low_memory=False)[FILE_NAME])

# In case a partial extraction has been run, set here the previous df to avoid re-extracting these files.
# prev_path = str(prefix / NAME) + '.csv'
Expand All @@ -33,7 +32,7 @@
# limit_files = xml_files[0:len(xml_files)//4]

extracted_df = FeaturesExtractor(
CustomConf("config_extraction_example.yml"),
GenericConfiguration("config_extraction_example.yml"),
data_dir = str(data_dir),
# musescore_dir = Path("data") / "musescore", #only for harmonic analysis
# exclude_files = exclude_files,
Expand All @@ -48,9 +47,4 @@
p = DataProcessor(str(DEST_PATH) + '.csv', "config_postprocess_example.yml")
p.process()

p.data.drop('level_0', axis='columns')
p.save(str(DEST_PATH))
final_name = f'{DEST_PATH}'+'_alldata'+'.csv'

# Running tests to ensure features values make sense
os.system(f'python tests/test_of_test.py {final_name}')
p.save(str(DEST_PATH))
Loading

0 comments on commit 3048288

Please sign in to comment.