Source code for pyprecag.table_ops

import logging

import pandas as pd
from geopandas import GeoDataFrame
from scipy.stats import stats

from . import config, TEMPDIR

LOGGER = logging.getLogger(__name__)
LOGGER.addHandler(logging.NullHandler())


[docs]def t_test(arr): """ Moving window t-test function. to get two columns in to the function, one must be set as the index. Example: input_table = input_table.set_index(treatment_column, drop=False) input_table['p_value'] = input_table['controls_mean'].rolling( window=size, center=True).apply(t_test, raw=False) """ tstat, pvalue = stats.ttest_rel(arr.index, arr) return pvalue
[docs]def response_index(arr): """ Moving window response index function to get two columns in to the function, one must be set as the index. Example: input_table = input_table.set_index(treatment_column, drop=False) input_table['RI'] = input_table['controls_mean'].rolling( window=size, center=True).apply(response_index, raw=False) """ idx_s = arr.index.to_series() return idx_s.mean() / arr.mean()
[docs]def calculate_strip_stats(input_table, treatment_column, control_columns=[], size=5): """Calculate statistics for a strip A moving window is used for some of the statistics. This window is centred so for a window size of 5, 2 NAN or blanks will be added to start and end of the output column. Statistics include (output column names): controls_mean - row by row mean of the control columns treat_diff - row by row difference between the treatment and controls_mean columns av_treat_diff - calculate mean of values using a moving window using the treat_diff column p_value - calculate p_value using a moving window using treatment and controls_mean columns RI - Response Index using the treatment and controls_mean columns Args: input_table (pandas.core.frame.DataFrame): the table to calculate statistics for treatment_column (str): The column containing the treatment values control_columns (List[str]): The column containing the control values. This can be one or two columns size (int):The size of the moving window. Returns: pandas.core.frame.DataFrame: The output table containing new statistics columns control_mean (str): The column used as the control mean. """ if isinstance(input_table, GeoDataFrame): # drop geometry etc. and create flat table. input_table = pd.DataFrame(input_table.drop(columns='geometry', inplace=False)) if not isinstance(control_columns, list): raise TypeError("control_columns should be a list.") if not isinstance(treatment_column, basestring): raise TypeError("treatment_column should be a string.") if treatment_column is None or treatment_column == '': raise ValueError('Invalid treatment column') missing = [ea for ea in [treatment_column] + control_columns if ea and ea not in input_table.columns] if len(missing) > 0: raise ValueError('columns not found - {}'.format(len(missing), ','.join(missing))) input_table = input_table.copy() ''' Statistics ----------------------------------------------------------------- ''' if len(control_columns) > 1: # calculate the mean for the column(s) control_mean = '-'.join(control_columns) control_mean = control_mean.replace(' Strip Value', '') control_mean = control_mean.replace(' Strip Control', '') control_mean = '{}_mean'.format(control_mean.strip()) input_table[control_mean] = input_table[control_columns].mean(axis=1) else: control_mean = control_columns[0] # calculate the difference input_table['treat_diff'] = input_table[treatment_column] - input_table[control_mean] # Moving Mean for values diff input_table['av_treat_dif'] = input_table['treat_diff'].rolling(size, center=True).mean() ''' Rolling window using two-tailed paired student t-test https://pythonfordatascience.org/paired-samples-t-test-python/ https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html https://stackoverflow.com/a/52029516 ''' input_table = input_table.set_index(treatment_column, drop=False) input_table['p_value'] = input_table[control_mean].rolling( window=size, center=True).apply(t_test, raw=False) input_table['RI'] = input_table[control_mean].rolling( window=size, center=True).apply(response_index, raw=False) # reset index input_table.set_index('TrialPtID', drop=False, inplace=True) return input_table, control_mean