Commit e9d38b88 authored by Olivier REYNET's avatar Olivier REYNET
Browse files

Start

parent 0481f09d
File added
DATASETS/Music
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeRunConfigurationManager" shouldGenerate="true" shouldDeleteObsolete="true">
<generated>
<config projectName="EML" targetName="EXTRACTION" />
</generated>
</component>
<component name="CMakeSettings" AUTO_RELOAD="true">
<configurations>
<configuration PROFILE_NAME="Debug" ENABLED="true" CONFIG_NAME="Debug" GENERATION_OPTIONS="-DCMAKE_C_COMPILER=/usr/local/Cellar/gcc/11.2.0/bin/gcc-11 -DCMAKE_CXX_COMPILER=/usr/local/Cellar/gcc/11.2.0/bin/g++-11" />
</configurations>
</component>
<component name="ChangeListManager">
<list default="true" id="26383937-935f-4956-8f2f-11bfcd08a878" name="Changes" comment="">
<change afterPath="$PROJECT_DIR$/ANN/CMakeLists.txt" afterDir="false" />
<change afterPath="$PROJECT_DIR$/CART/CMakeLists.txt" afterDir="false" />
<change afterPath="$PROJECT_DIR$/RF/CMakeLists.txt" afterDir="false" />
<change afterPath="$PROJECT_DIR$/SVM/CMakeLists.txt" afterDir="false" />
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="ClangdSettings">
<option name="formatViaClangd" value="false" />
</component>
<component name="ExecutionTargetManager" SELECTED_TARGET="CMakeBuildProfile:Debug" />
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="CMakeLists.txt" />
</list>
</option>
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="GitSEFilterConfiguration">
<file-type-list>
<filtered-out-file-type name="LOCAL_BRANCH" />
<filtered-out-file-type name="REMOTE_BRANCH" />
<filtered-out-file-type name="TAG" />
<filtered-out-file-type name="COMMIT_BY_MESSAGE" />
</file-type-list>
</component>
<component name="ProjectId" id="20wjA8rRmpiAFbejjKBEgjVGzXN" />
<component name="ProjectLevelVcsManager" settingsEditedManually="true" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">
<property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
<property name="RunOnceActivity.cidr.known.project.marker" value="true" />
<property name="WebServerToolWindowFactoryState" value="true" />
<property name="cf.first.check.clang-format" value="false" />
<property name="cidr.known.project.marker" value="true" />
<property name="cmake.loaded.for.project" value="true" />
<property name="last_opened_file_path" value="$PROJECT_DIR$/Helpers" />
<property name="settings.editor.selected.configurable" value="configurable.group.build" />
</component>
<component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="$PROJECT_DIR$/Helpers" />
</key>
</component>
<component name="RunManager">
<configuration name="EXTRACTION" type="CMakeRunConfiguration" factoryName="Application" REDIRECT_INPUT="false" ELEVATE="false" USE_EXTERNAL_CONSOLE="false" PASS_PARENT_ENVS_2="true" PROJECT_NAME="EML" TARGET_NAME="EXTRACTION" CONFIG_NAME="Debug" RUN_TARGET_PROJECT_NAME="EML" RUN_TARGET_NAME="EXTRACTION">
<method v="2">
<option name="com.jetbrains.cidr.execution.CidrBuildBeforeRunTaskProvider$BuildBeforeRunTask" enabled="true" />
</method>
</configuration>
<configuration default="true" type="GradleAppRunConfiguration" factoryName="Application" REDIRECT_INPUT="false" ELEVATE="false" USE_EXTERNAL_CONSOLE="false" PASS_PARENT_ENVS_2="true">
<method v="2">
<option name="com.jetbrains.cidr.cpp.gradle.execution.GradleNativeBuildBeforeRunTaskProvider$BuildBeforeRunTask" enabled="true" />
</method>
</configuration>
</component>
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="26383937-935f-4956-8f2f-11bfcd08a878" name="Changes" comment="" />
<created>1636962397683</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1636962397683</updated>
<workItem from="1636962401533" duration="34000" />
<workItem from="1636962457254" duration="1024000" />
<workItem from="1636963901423" duration="95000" />
<workItem from="1636964317616" duration="175000" />
<workItem from="1636964520206" duration="4959000" />
</task>
<servers />
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="3" />
</component>
</project>
\ No newline at end of file
File added
cmake_minimum_required(VERSION 3.18)
project(EML)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -Wextra -std=c++20")
add_subdirectory(Extraction)
#add_subdirectory(CART)
#add_subdirectory(RF)
#add_subdirectory(SVM)
#add_subdirectory(ANN)
This diff is collapsed.
This diff is collapsed.
add_executable(EXTRACTION fearures_extraction.cpp main.cpp)
\ No newline at end of file
#include <numeric>
#include <fstream>
#include "features_extraction.h"
#include "../Helpers/globals.h"
#include "../Helpers/signal.h"
#include "../Helpers/au_reading.h"
std::map<FTYPE, DataVector> stft(DataVector &signal) {
//TODO compute STFT with the help of signal.h
auto avg = DataVector(FFT_SIZE);
auto stddev = DataVector(FFT_SIZE);
//TODO compute bins average and stddev
std::map<FTYPE, DataVector> features;
//insert bins average and stddev in features
features.insert({FTYPE::BINAVG, DataVector(avg.size())});
features.insert({FTYPE::BINSTDEV, DataVector(stddev.size())});
std::copy(avg.cbegin(), avg.cend(), features[FTYPE::BINAVG].begin());
std::copy(stddev.cbegin(), stddev.cend(), features[FTYPE::BINSTDEV].begin());
return features;
}
void write_csv(std::string filename,
std::vector<std::pair<std::filesystem::path, std::map<FTYPE, DataVector>>> &dataset) {
std::ofstream myFile(filename);
auto datah = dataset[0];
auto ith = datah.second.begin();
std::map<FTYPE, std::string> type_names{{FTYPE::SPECCENT, "SPECCENT"},
{FTYPE::BINAVG, "BINAVG"},
{FTYPE::BINSTDEV, "BINSTDEV"}};
while (ith != datah.second.end()) {
auto type_name = type_names[ith->first];
auto data = ith->second;
for (std::size_t i = 0; i < data.size(); ++i)
myFile << type_name << i << ",";
ith++;
}
myFile << "Style" << ",";
myFile << "FileName" << "\n";
auto it = dataset.begin();
while (it != dataset.end()) {
auto style = it->first.parent_path().filename();
auto filename = it->first;
for (auto const &entry: it->second) {
//auto ftype = entry.first;
for (auto elem: entry.second) {
myFile << elem << ", ";
}
}
myFile << style << ",";
myFile << filename << "\n";
it++;
}
myFile.close();
}
std::map<FTYPE, DataVector> compute_features_for(std::filesystem::path &file_path) {
auto data = readAuFile(file_path.string());
auto features = stft(data);
return features;
}
void compute_set_of_features(std::vector<std::filesystem::path> &files) {
std::vector<std::pair<std::filesystem::path, std::map<FTYPE, DataVector>>> all_features;
for (auto file: files) {
std::cout << "Reading --> " << file.filename() << std::endl;
auto data = readAuFile(file.string());
auto features = stft(data);
all_features.push_back(std::make_pair(file, features));
//std::cout << "Training parameters size --> " << features[FTYPE::BINAVG].size() << "x" << features[FTYPE::BINSTDEV].size() << std::endl;
}
std::cout << "Ready to write file --> " << "features.csv" << std::endl;
std::cout << "Training features size --> " << all_features.size() << std::endl;
write_csv("features.csv", all_features);
std::cout << "File written !" << std::endl;
}
#ifndef EML_FEATURES_EXTRACTION_H
#define EML_FEATURES_EXTRACTION_H
#include <map>
#include <filesystem>
#include "../Helpers/etypes.h"
std::map<FTYPE, DataVector> stft(DataVector &signal);
void write_csv(std::string filename,
std::vector<std::pair<std::filesystem::path, std::map<FTYPE, DataVector>>> &dataset);
std::map<FTYPE, DataVector> compute_features_for(std::filesystem::path &file_path);
void compute_set_of_features(std::vector<std::filesystem::path> &files);
#endif //EML_FEATURES_EXTRACTION_H
#include <iostream>
#include <chrono>
#include <tuple>
#include "../Helpers/file_helpers.h"
#include "features_extraction.h"
int main() {
auto beg = std::chrono::high_resolution_clock::now();
auto dirs = alpha_dir_listing("../../DATASETS/Music");
std::vector<std::filesystem::path> training_files;
std::vector<std::filesystem::path> testing_files;
// Select random files of each music style
for (auto dir_path: dirs) {
auto files = alpha_files_listing(dir_path);
std::vector<std::filesystem::path> training;
std::vector<std::filesystem::path> testing;
std::tie(training, testing) = select_train_test_files(files, 0.0);
training_files.insert(training_files.end(), training.begin(), training.end());
testing_files.insert(testing_files.end(), testing.begin(), testing.end());
}
std::cout << "# training --> " << training_files.size() << std::endl;
std::cout << "# testing --> " << testing_files.size() << std::endl;
// for (auto elem: training_files)
// std::cout << "Training --> " << elem << std::endl;
// for (auto elem: testing_files)
// std::cout << "Testing --> " << elem << std::endl;
// for (auto file: training_files)
// auto f = compute_features_for(file);
compute_set_of_features(training_files);
auto end = std::chrono::high_resolution_clock::now();
std::cout << std::chrono::duration_cast<std::chrono::seconds>(end - beg).count() << " s" << std::endl;
return 0;
}
#ifndef AU_READING_H
#define AU_READING_H
//32 bit word (unsigned) field Description/Content Hexadecimal numbers in C notation
//0 magic number the value 0x2e736e64 (four ASCII characters ".snd")
//1 data offset the offset to the data in bytes, must be divisible by 8. The minimum valid number is 24 (decimal), since this is the header length (six 32-bit words) with no space reserved for extra information (the annotation field). The minimum valid number with an annotation field present is 32 (decimal).
//2 data size data size in bytes. If unknown, the value 0xffffffff should be used.
//3 encoding Data encoding format:
//
//1 = 8-bit G.711 μ-law
//2 = 8-bit linear PCM
//3 = 16-bit linear PCM
//4 = 24-bit linear PCM
//5 = 32-bit linear PCM
//6 = 32-bit IEEE floating point
//7 = 64-bit IEEE floating point
//8 = Fragmented sample data
//9 = DSP program
//10 = 8-bit fixed point
//11 = 16-bit fixed point
//12 = 24-bit fixed point
//13 = 32-bit fixed point
//18 = 16-bit linear with emphasis
//19 = 16-bit linear compressed
//20 = 16-bit linear with emphasis and compression
//21 = Music kit DSP commands
//23 = 4-bit compressed using the ITU-T G.721 ADPCM voice data encoding scheme
//24 = ITU-T G.722 SB-ADPCM
//25 = ITU-T G.723 3-bit ADPCM
//26 = ITU-T G.723 5-bit ADPCM
//27 = 8-bit G.711 A-law
//
//4 sample rate the number of samples/second, e.g., 8000
//5 channels the number of interleaved channels, e.g., 1 for mono, 2 for stereo; more channels possible, but may not be supported by all readers.
#include <iostream>
#include <fstream>
#include <cstdio>
#include <cmath>
#include <string>
#include <iostream>
#include <filesystem>
#include <vector>
#include "globals.h"
namespace fs = std::filesystem;
struct AU_header {
uint32_t magic_number;
uint32_t data_offset;
uint32_t data_size;
uint32_t encoding;
uint32_t sample_rate;
uint32_t channels;
};
std::ostream &operator<<(std::ostream &s, const AU_header *h) {
return s << "AU Header:\n"
<< std::hex << "\tMagic Number:\t 0x" << std::hex << h->magic_number << "\n"
<< std::dec << "\tData Offset:\t " << std::dec << h->data_offset << "\n"
<< std::dec << "\tData size:\t " << (double) (h->data_size) / (1.0 * (1 << 20)) << " MiB\n"
<< std::dec << "\tEncoding:\t " << h->encoding << "\n"
<< std::dec << "\tSample Rate:\t " << h->sample_rate << " sample/s \n"
<< std::dec << "\tChannels:\t " << h->channels
<< std::dec;
}
uint32_t read_w(std::ifstream &file, bool bigEndian=true) {
uint32_t word = 0;
uint8_t b;
for (std::size_t i = 0; i < 4; i++) {
file.read(reinterpret_cast<char *>(&b), sizeof(uint8_t));
if (bigEndian)
word = word | (b << ((3 - i) * 8));
else
word = word | (b << (i * 8));
}
return word;
}
void auFileDetails(const std::string fileName) {
FILE *fin = fopen(fileName.c_str(), "rb");
std::ifstream myFile(fileName);
std::unique_ptr<AU_header> header = std::make_unique<AU_header>();
header->magic_number = read_w(myFile);
std::cout << "(0-3) Magic number: " << std::hex << header->magic_number << std::endl;
header->data_offset = read_w(myFile);
std::cout << "(4-7) Data Offset: " << std::dec << header->data_offset << std::endl;
header->data_size = read_w(myFile);
std::cout << "(8-11) Data size: "
<< header->data_size
<< " B, "
<< header->data_size / (1.0 * (1 << 20))
<< " MiB"
<< std::endl;
header->encoding = read_w(myFile);
std::cout << "(12-15) Encoding: " << header->encoding << std::endl;
header->sample_rate = read_w(myFile);
std::cout << "(16-19) Sample rate: " << header->sample_rate << std::endl;
header->channels = read_w(myFile);
std::cout << "(20-23) Channels: " << header->channels << std::endl;
fclose(fin);
}
DataVector readAuFile(const std::string fileName) {
FILE *fin = fopen(fileName.c_str(), "rb");
std::ifstream myFile(fileName);
DataVector data;
std::unique_ptr<AU_header> header = std::make_unique<AU_header>();
header->magic_number = read_w(myFile);
//std::cout << "(0-3) Magic number: " << std::hex << header->magic_number << std::endl;
header->data_offset = read_w(myFile);
//std::cout << "(4-7) Data Offset: " << std::dec << header->data_offset << std::endl;
header->data_size = read_w(myFile);
//std::cout << "(8-11) Data size: "
// << header->data_size
// << " B, "
// << header->data_size / (1.0 * (1 << 20))
// << " MiB"
// << std::endl;
header->encoding = read_w(myFile);
//std::cout << "(12-15) Encoding: " << header->encoding << std::endl;
header->sample_rate = read_w(myFile);
//std::cout << "(16-19) Sample rate: " << header->sample_rate << std::endl;
header->channels = read_w(myFile);
//std::cout << "(20-23) Channels: " << header->channels << std::endl;
myFile.seekg(header->data_offset, std::ios_base::beg);
uint8_t lower_bits;
uint8_t higher_bits;
for (std::size_t k = 0; k < header->data_size/2; k++) {
myFile.read(reinterpret_cast<char *>(&higher_bits), sizeof(uint8_t));
myFile.read(reinterpret_cast<char *>(&lower_bits), sizeof(uint8_t));
data.push_back(static_cast<real>((signed short) ((higher_bits << 8) + lower_bits)));
}
myFile.close();
fclose(fin);
return data;
}
#endif //AU_READING_H
#ifndef ETYPES_H
#define ETYPES_H
#include <complex>
#include <vector>
typedef double real;
typedef std::complex<real> Complex;
typedef std::vector<real> DataVector;
enum class FTYPE : char {
BINAVG = 1, BINSTDEV = 2, SPECCENT = 3
};
#endif //ETYPES_H
#ifndef FILE_HELPERS_H
#define FILE_HELPERS_H
#include <vector>
#include <string>
#include <fstream>
#include <iostream>
#include <tuple>
#include <set>
#include <filesystem>
#include <map>
#include <random>
#include "etypes.h"
std::vector<std::filesystem::path> alpha_dir_listing(std::string dir_path) {
std::set<std::filesystem::path> sorted_by_name_dirs;
std::set<std::filesystem::path> sorted_by_name_files;
std::vector<std::filesystem::path> dirs_listing;
for (auto &entry: std::filesystem::directory_iterator(dir_path)) {
if (std::filesystem::is_directory(entry.path())) {
sorted_by_name_dirs.insert(entry.path());
}
}
std::copy(sorted_by_name_dirs.cbegin(), sorted_by_name_dirs.cend(), std::back_inserter(dirs_listing));
return dirs_listing;
}
std::vector<std::filesystem::path> alpha_files_listing(std::string dir_path) {
std::set<std::filesystem::path> sorted_by_name_files;
std::vector<std::filesystem::path> files_listing;
for (const auto &file: std::filesystem::directory_iterator(dir_path))
sorted_by_name_files.insert(file);
std::copy(sorted_by_name_files.cbegin(), sorted_by_name_files.cend(), std::back_inserter(files_listing));
return files_listing;
}
std::pair<std::vector<std::filesystem::path>, std::vector<std::filesystem::path>>
select_train_test_files(std::vector<std::filesystem::path> files, double ratio) {
std::size_t training_size = std::floor(files.size() * (1.0 - ratio));
//std::size_t testing_size = files.size() - training_size;
//std::cout << training_size << " " << testing_size << std::endl;
std::random_device random_device;
//std::mt19937 engine{66};
std::mt19937 engine{random_device()};
std::uniform_int_distribution<int> dist(0, files.size()-1);
std::set<std::filesystem::path> training_files_set;
std::set<int> indexes;
for (std::size_t k = 0; k < training_size; k++) {
int random_index;
do {
random_index = dist(engine);
} while (indexes.contains(random_index));
indexes.insert(random_index);
training_files_set.insert(files[random_index]);
}
std::vector<std::filesystem::path> testing_files;
for (std::size_t k = 0; k < files.size(); k++) {
if (!indexes.contains(k))
testing_files.push_back(files[k]);
}
std::vector<std::filesystem::path> training_files;
std::copy(training_files_set.cbegin(), training_files_set.cend(), std::back_inserter(training_files));
return std::make_pair(training_files, testing_files);
}
#endif //FILE_HELPERS_H
#ifndef GLOBAL_H
#define GLOBAL_H
#include "etypes.h"
constexpr std::size_t N = 512; // WINDOW SIZE
constexpr real Fs = 22050.0; // SAMPLING FREQUENCY
const std::size_t FFT_SIZE = N / 2;
constexpr std::size_t CLASS_N = 10; // CLASS NUMBER
constexpr std::size_t FEAT_N = 512; // FEATURES NUMBER (if AVG and STD)
constexpr std::size_t OVO_CLASS_N = 45; // One Versus One CLASS NUMBER
#endif //GLOBAL_H
#include <string>
#include <stdexcept>
#include "music_style_helpers.h"
std::string music_style_to_string(const MUSIC_STYLE &s) {
switch (s) {
case MUSIC_STYLE::BLUES:
return "blues";
case MUSIC_STYLE::CLASSICAL:
return "classical";
case MUSIC_STYLE::COUNTRY:
return "country";
case MUSIC_STYLE::DISCO:
return "disco";
case MUSIC_STYLE::HIPHOP:
return "hiphop";
case MUSIC_STYLE::JAZZ:
return "jazz";
case MUSIC_STYLE::METAL:
return "metal";
case MUSIC_STYLE::POP:
return "pop";
case MUSIC_STYLE::REGGAE:
return "reggae";
case MUSIC_STYLE::ROCK:
return "rock";
default:
throw std::logic_error("to_string: MUSIC_STYLE enum values not found.");
}
}
MUSIC_STYLE music_style_from_string(std::string str) {
if (str == "blues") {
return MUSIC_STYLE::BLUES;
} else if (str == "classical") {
return MUSIC_STYLE::CLASSICAL;
} else if (str == "country") {
return MUSIC_STYLE::COUNTRY;
} else if (str == "disco") {
return MUSIC_STYLE::DISCO;
} else if (str == "hiphop") {
return MUSIC_STYLE::HIPHOP;
} else if (str == "jazz") {
return MUSIC_STYLE::JAZZ;
} else if (str == "metal") {
return MUSIC_STYLE::METAL;
} else if (str == "pop") {