import pandas as pd
import math
import statsmodels.tsa.stattools as tsa
from statsmodels.tsa.api import VAR
from statsmodels.stats.stattools import durbin_watson
import os
from statistics import mean, median
import pyinform

def GTest(source,export,pvalue=0.05):
        data_list = []
        for filename in os.listdir(source):
            if filename.endswith(".csv"):
                new_name = os.path.splitext(filename)[0]
                data_list.append(new_name)
        Gmatrix = pd.DataFrame(index=data_list, columns=data_list)
        Node_export = pd.DataFrame()
        Node_export['Label'] = Gmatrix.columns
        Node_export.to_csv(export + 'Node_List.csv')
        Forecast_df = pd.DataFrame()
        Edge_export = pd.DataFrame()
        Edge_X = []
        Edge_Y = []
        Type_List = []
        Convert_list = Gmatrix.columns.tolist()
        F_test_counter = 0
        Chisq_counter = 0
        double = 0
        VAR_count = 0
        Spurious_counter = 0
        for x in Gmatrix.columns:
            Close_1 = pd.read_csv(source + x +'.csv')
            x_future_values = []
            if len(Close_1.Close) < 34:
                print(len(Close_1.Close))
                print('Sample One Is Too Small!')
            else:
                for y in Gmatrix.columns:
                    is_same = 0
                    Close_2 = pd.read_csv(source + y + '.csv')
                    if len(Close_2.Close) < 34:
                        print('Sample Two is too small')
                    else:
                        Close_df = pd.DataFrame()
                        Close_df[x] = Close_1.Close.diff()
                        Close_df[y] = Close_2.Close.diff()
                        # print(x,y)
                        Close_df.dropna(inplace=True)
                        if x != y:
                            res = tsa.grangercausalitytests(Close_df, 1, verbose=False)
                            f_test_p = res[1][0]['ssr_ftest'][1]
                            chi2_test_p = res[1][0]['ssr_chi2test'][1]
                            if chi2_test_p <= 0.05:
                                is_same = is_same + 1
                                Chisq_counter = Chisq_counter + 1
                            if f_test_p <= 0.05:
                                is_same = is_same + 1
                                F_test_counter = F_test_counter + 1
                            if is_same == 2:
                                double = double + 1
                                VAR_count = VAR_count + 1
                                Type_List.append('Directed')
                                Edge_Y.append(Convert_list.index(y))
                                Edge_X.append(Convert_list.index(x))
                                model = VAR(Close_df)
                                results = model.fit()
                                residual_df = results.resid
                                DW = durbin_watson(resids=residual_df)
                                print('------------------------------------')
                                print(DW)
                                residuals = residual_df.iloc[0:, 0] ** 2
                                RSS = residuals.sum()
                                SquaredSums = []
                                for p in Close_df.iloc[0:, 0]:
                                    tmp = p - Close_df.iloc[0:, 0].mean()
                                    SquaredSums.append(tmp ** 2)
                                SST = sum(SquaredSums)
                                R_squared = 1 - (RSS / SST)
                                print(R_squared)
                                print('------------------------------------')
                                if R_squared <= DW[0]:
                                    lag_order = results.k_ar
                                    pred = results.forecast(Close_df.values[-lag_order:], 1)
                                    future_value = pred[0][0] + Close_1.at[len(Close_1.Close) - 1, 'Close']
                                    print(x + ' as caused by ' + y)
                                    print('Not Spurious')
                                    print(future_value)
                                    x_future_values.append(future_value)
                                else:
                                    print("Is Spurious")
                                    Spurious_counter = Spurious_counter + 1
                print(x_future_values)
                if len(x_future_values) >= 1:
                    print(mean(x_future_values))
                    Forecast_df[x] = mean(x_future_values)
        print(F_test_counter)
        print(Chisq_counter)
        print(VAR_count)
        print(Spurious_counter)
        print(Forecast_df.T)
        Forecast_df.T.to_csv(export + 'Forecast.csv')
        Edge_export['Source'] = Edge_Y
        Edge_export['Target'] = Edge_X
        Edge_export['Type'] = Type_List
        Edge_export.to_csv(export + 'Edges.csv')
        return

#GTest('/home/oblanco214/PyServer/lotusfiles/Future/Data/','/var/www/html/Gephi/')
print("Done!!")


Data1 = pd.read_csv('/home/oblanco214/PyServer/lotusfiles/Future/Data/UDR.csv').Close.values
Data2 = pd.read_csv('/home/oblanco214/PyServer/lotusfiles/Future/Data/SBUX.csv').Close.values

TERes = pyinform.transferentropy.transfer_entropy(Data1,Data2,k=2)

print(TERes)



