Source code for iMaT.src.tokenization.refine_results.calculate_pitch_intervals

"""
Module: tokenization.refine_results.calculate_pitch_intervals.py
================================================================

This module, part of the `tokenization.refine_results` package, refines tokenized MIDI data by calculating pitch intervals.

Functions
---------
- `tokenization_calculate_pitch_intervals`: Handles a workflow for refining tokenized MIDI data by calculating pitch intervals.

- `calculate_pitch_intervals_function`: Helper function used within `tokenization_calculate_pitch_intervals` to add a pitch interval column to a DataFrame.

Notes
-----
The module expects CSV files to have a specific structure, including a 'filename' column, and pitches should be represented as MIDI pitch values.
Please refer to the individual function docstrings for more detailed descriptions and examples of usage.
"""
import numpy as np
import pandas as pd
from tqdm import tqdm

from iMaT.src.cli.menu_constructors import display_menu_print_results, display_menu_print_textblock, \
    display_menu_request_selection, util_convert_pd_dataframe_to_imat_datacont
from iMaT.src.tokenization.utils import save_data_to_new_csv_file, select_csv_file_2d_token_representation
from iMaT.src.utils.error_handling import handle_error


[docs]def tokenization_calculate_pitch_intervals(): """ Executes a workflow for refining CSV data by calculating pitch intervals. This function guides the user to select a CSV file, performs data refining operations to calculate pitch differences between the current row and the next row (grouped by filename if available), and displays a table with the results. The user then has an option to save the refined data into a new CSV file. Parameters: None Returns: None See Also -------- select_csv_file_2d_token_representation : Opens a file dialog allowing the user to select a CSV file. calculate_pitch_intervals_function : Refines a pandas DataFrame by calculating pitch differences between the current row and the next row, grouping by filename if available. """ try: while True: file_name = select_csv_file_2d_token_representation() if file_name is None: break df = pd.read_csv(file_name) df = calculate_pitch_intervals_function(df) # Step 4: show the user the first 30 rows after executing step 2 and 3 results_dict = util_convert_pd_dataframe_to_imat_datacont(df.head(30)) display_menu_print_results(results_dict) # Step 5: ask the user whether he wants to save the new file yes_no_menu = { "menu_displayed_text": [ "Save Refined Data", "Do you want to save the refined data to a new CSV file?", "Please select your choice (1-2): ", ["Choice", "Description"], ], "menu_entries": [ ["CONT: Save the new file", "Yes", "Yes, save the refined data to a new CSV file"], ["DONT: Do not save the new file", "No", "No, do not save the refined data"], ] } save_input = display_menu_request_selection(yes_no_menu) if save_input.lower() == 'yes': new_file_path = save_data_to_new_csv_file(df, file_name, "add_pitch_interval_") textblock_dict_newfile = { "menu_displayed_text": [ "-- New File Path --", "Please read the following message:", "<To continue, please press Enter>", ["", "Message"], ], "menu_entries_text": [ ["New File Path", f"The refined data has been saved to a new CSV file: {new_file_path}"] ] } display_menu_print_textblock(textblock_dict_newfile) break except Exception as e: handle_error(e)
[docs]def calculate_pitch_intervals_function(df): """ Refines a pandas DataFrame by calculating pitch differences between the current row and the next row. This function first checks if the 'Pitch' column exists in the DataFrame. If so, it calculates the pitch differences between the current row and the next row. The operation is performed for each unique filename if a 'filename' column exists in the DataFrame. If the 'Pitch' column contained non-numeric entries (i.e., had a prefix), it adds the prefix to the calculated difference values. Parameters ---------- df : pandas.DataFrame The DataFrame to refine. Returns ------- pandas.DataFrame The DataFrame with added 'PitchDifferenceToNextPitch' column. See Also -------- pandas.DataFrame.diff : Calculates the difference of a DataFrame element compared with another element in the DataFrame (default is the element in the same column of the previous row). pandas.DataFrame.shift : Shifts index by desired number of periods with an optional time freq. """ try: if 'Pitch' not in df.columns: return df prefix = "Pitch_" df["PurePitch"] = df["Pitch"].apply(lambda x: float(x[len(prefix):]) if x.startswith(prefix) else float(x)) # Check if "Pitch_" prefix is present has_prefix = df["Pitch"].str.startswith(prefix).any() # Initialize a new column for pitch differences with NaNs df['PitchDifferenceToNextPitch'] = np.nan # Calculate pitch differences, grouped by 'filename' if it exists if 'filename' in df.columns: filenames = df['filename'].unique() for filename in tqdm(filenames, desc='Calculating pitch differences'): filename_group = df[df['filename'] == filename].sort_index() notes_only = filename_group[~filename_group['PurePitch'].isna()] differences = notes_only['PurePitch'].diff().shift(-1) df.loc[differences.index, 'PitchDifferenceToNextPitch'] = differences else: notes_only = df[~df['PurePitch'].isna()] differences = notes_only['PurePitch'].diff().shift(-1) df.loc[differences.index, 'PitchDifferenceToNextPitch'] = differences # If 'Pitch' column had non-numeric entries (i.e., had a prefix), add prefix to the calculated difference values if has_prefix: df['PitchDifferenceToNextPitch'] = 'PitchDifferenceToNextPitch_' + df['PitchDifferenceToNextPitch'].astype(str) # Cleanup - remove PurePitch column df = df.drop(columns="PurePitch") return df except Exception as e: handle_error(e)