Source code for syndirella.slipper._placement_data

#!venv/bin/env python3
"""
syndirella.slipper._placement_data.py

This module contains functions to get the placement data from a Fragmenstein run.
"""
import os
import json
import pandas as pd
import csv
import glob2


[docs]
def get_placement_data(products: pd.DataFrame, fragmenstein_output: str, library_output_dir: str) -> pd.DataFrame:
    # Make fragmenstein_placements.csv
    placements_path: str = make_fragmenstein_placements_csv(fragmenstein_output, library_output_dir)
    # Read csv
    placements: pd.DataFrame = pd.read_csv(placements_path)
    # Merge products with success_df on 'name' column
    merged_df = pd.merge(products, placements, on='name', how='left')
    # drop index
    if 'index' in merged_df.columns:
        merged_df = merged_df.drop(columns=['index'])
    # order merged_df by 'num_atom_difference' column
    merged_df = merged_df.sort_values(by=['num_atom_diff'])
    if 'Unnamed: 0' in merged_df.columns:
        merged_df = merged_df.drop(columns=['Unnamed: 0'])
    final_products_csv_path = find_products_csv(library_output_dir)
    merged_output_path = final_products_csv_path.split('.csv')[0] + '_placements.csv'
    merged_df.to_csv(merged_output_path, index=False)
    return merged_df



[docs]
def find_products_csv(library_output_dir: str) -> str:
    """
    Given a directory, find the products and non-placements csv file.
    """
    # Pattern for matching files that contain 'products' but not '_placements' in their names
    pattern = library_output_dir + '/*products*.csv'
    matched_files = glob2.glob(pattern)
    # Filtering out files that contain '_placements'
    filtered_files = [file for file in matched_files if '_placements' not in file]
    return filtered_files[0]



[docs]
def make_fragmenstein_placements_csv(output_path: str, library_output_dir: str) -> str:
    """
    This function makes a fragmenstein_placements.csv by looking through outputs of Fragmenstein.

    Args:
        output_path: str: The path to the output directory of Fragmenstein.
        library_output_dir: str: The path to the directory where the final products csv is located.
    """
    # Make fragmenstein_placements.csv
    headers = ['name', 'ΔΔG', 'ΔG_bound', 'ΔG_unbound', 'comRMSD']
    collected_data = []
    for subdir in os.listdir(output_path):
        if subdir.startswith('.'):  # Skip hidden files/directories
            continue
        subdir_path = os.path.join(output_path, subdir)
        if os.path.isdir(subdir_path):
            for file in os.listdir(subdir_path):
                if file.endswith('.json'):
                    json_file_path = os.path.join(subdir_path, file)
                    with open(json_file_path, 'r') as file:
                        data = json.load(file)
                        try:
                            csv_row = make_success_csv_row(subdir, data)
                            collected_data.append(csv_row)
                        except Exception as e:
                            print(f'Error: {e}')
    csv_file_path = os.path.join(library_output_dir, 'fragmenstein_placements.csv')
    if collected_data:
        with open(csv_file_path, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(headers)
            writer.writerows(collected_data)
        return csv_file_path
    return None



[docs]
def get_delta_delta_G(data: dict) -> float:
    # Get the delta delta G value from the JSON file. Accounts for different formats.
    try:
        return data["Energy"]["xyz_∆∆G"]
    except KeyError:
        try:
            bound = data["Energy"]["bound"]['total_score']
            unbound = data["Energy"]["unbound"]['total_score']
            ddG = bound - unbound
            return ddG
        except KeyError:
            return float('inf')



[docs]
def get_bound_unbound(data: dict) -> tuple:
    """Get the bound and unbound energy values from the JSON file. Accounts for different formats"""
    try:
        bound = data["Energy"]["xyz_bound"]
        unbound = data["Energy"]["xyz_unbound"]
        return bound, unbound
    except KeyError:
        try:
            bound = data["Energy"]["bound"]["total_score"]
            unbound = data["Energy"]["unbound"]["total_score"]
            return bound, unbound
        except KeyError:
            return float('inf'), float('inf')



[docs]
def make_success_csv_row(subdir: str, data: dict) -> list:
    """Make a row for the success.csv file."""
    ddG = get_delta_delta_G(data)
    bound, unbound = get_bound_unbound(data)
    try:
        rmsd = data["mRMSD"]
    except KeyError:
        rmsd = float('inf')
    csv_row = [subdir, ddG, unbound, bound, rmsd]
    return csv_row