import yfinance as yf
import pandas as pd
import datetime as dt
import os
import math
import alpaca_trade_api_python as tradeapi
import statsmodels.tsa.stattools as tsa
from statsmodels.tsa.api import VAR
from statsmodels.stats.stattools import durbin_watson
from statsmodels.tsa.stattools import adfuller
from statistics import mean, median
import matplotlib as plt
#from iexfinance.stocks import get_historical_data
import datetime as dt
import multiprocessing as mp

class Constants:
    tickers = ["TSLA","MMM","ABT","ABBV","ABMD","ACN","ATVI","ADBE","AMD","AAP","AES","AFL","A","APD","AKAM","ALK","ALB","ARE","ALXN","ALGN","ALLE","LNT","ALL","GOOGL","GOOG","MO","AMZN","AMCR","AEE","AAL","AEP","AXP","AIG","AMT","AWK","AMP","ABC","AME","AMGN","APH","ADI","ANSS","ANTM","AON","AOS","APA","AIV","AAPL","AMAT","APTV","ADM","ANET","AJG","AIZ","T","ATO","ADSK","ADP","AZO","AVB","AVY","BKR","BLL","BAC","BK","BAX","BDX","BBY","BIO","BIIB","BLK","BA","BKNG","BWA","BXP","BSX","BMY","AVGO","BR","CHRW","COG","CDNS","CPB","COF","CAH","KMX","CCL","CARR","CAT","CBOE","CBRE","CDW","CE","CNC","CNP","CTL","CERN","CF","SCHW","CHTR","CVX","CMG","CB","CHD","CI","CINF","CTAS","CSCO","C","CFG","CTXS","CLX","CME","CMS","KO","CTSH","CL","CMCSA","CMA","CAG","CXO","COP","ED","STZ","COO","CPRT","GLW","CTVA","COST","COTY","CCI","CSX","CMI","CVS","DHI","DHR","DRI","DVA","DE","DAL","XRAY","DVN","DXCM","FANG","DLR","DFS","DISCA","DISCK","DISH","DG","DLTR","D","DPZ","DOV","DOW","DTE","DUK","DRE","DD","DXC","ETFC","EMN","ETN","EBAY","ECL","EIX","EW","EA","EMR","ETR","EOG","EFX","EQIX","EQR","ESS","EL","EVRG","ES","RE","EXC","EXPE","EXPD","EXR","XOM","FFIV","FB","FAST","FRT","FDX","FIS","FITB","FE","FRC","FISV","FLT","FLIR","FLS","FMC","F","FTNT","FTV","FBHS","FOXA","FOX","BEN","FCX","GPS","GRMN","IT","GD","GE","GIS","GM","GPC","GILD","GL","GPN","GS","GWW","HRB","HAL","HBI","HIG","HAS","HCA","PEAK","HSIC","HSY","HES","HPE","HLT","HFC","HOLX","HD","HON","HRL","HST","HPQ","HUM","HBAN","HII","IEX","IDXX","INFO","ITW","ILMN","INCY","IR","INTC","ICE","IBM","IP","IPG","IFF","INTU","ISRG","IVZ","IPGP","IQV","IRM","JKHY","J","JBHT","SJM","JNJ","JCI","JPM","JNPR","KSU","K","KEY","KEYS","KMB","KIM","KMI","KLAC","KSS","KHC","KR","LB","LHX","LH","LRCX","LW","LVS","LEG","LDOS","LEN","LLY","LNC","LIN","LYV","LKQ","LMT","L","LOW","LYB","MTB","MRO","MPC","MKTX","MAR","MMC","MLM","MAS","MA","MKC","MXIM","MCD","MCK","MDT","MRK","MET","MTD","MGM","MCHP","MU","MSFT","MAA","MHK","TAP","MDLZ","MNST","MCO","MS","MOS","MSI","MSCI","MYL","NDAQ","NOV","NTAP","NFLX","NWL","NEM","NWSA","NWS","NEE","NLSN","NKE","NI","NBL","NSC","NTRS","NOC","NLOK","NCLH","NRG","NUE","NVDA","NVR","ORLY","OXY","ODFL","OMC","OKE","ORCL","OTIS","PCAR","PKG","PH","PAYX","PAYC","PYPL","PNR","PBCT","PEP","PKI","PRGO","PFE","PM","PSX","PNW","PXD","PNC","PPG","PPL","PFG","PG","PGR","PLD","PRU","PEG","PSA","PHM","PVH","QRVO","PWR","QCOM","DGX","RL","RJF","RTX","O","REG","REGN","RF","RSG","RMD","RHI","ROK","ROL","ROP","ROST","RCL","SPGI","CRM","SBAC","SLB","STX","SEE","SRE","NOW","SHW","SPG","SWKS","SLG","SNA","SO","LUV","SWK","SBUX","STT","STE","SYK","SIVB","SYF","SNPS","SYY","TMUS","TROW","TTWO","TPR","TGT","TEL","FTI","TDY","TFX","TXN","TXT","TMO","TIF","TJX","TSCO","TT","TDG","TRV","TFC","TWTR","TYL","TSN","UDR","ULTA","USB","UAA","UA","UNP","UAL","UNH","UPS","URI","UHS","UNM","VFC","VLO","VAR","VTR","VRSN","VRSK","VZ","VRTX","VIAC","V","VNO","VMC","WRB","WAB","WMT","WBA","DIS","WM","WAT","WEC","WFC","WELL","WST","WDC","WU","WRK","WY","WHR","WMB","WLTW","WYNN","XEL","XRX","XLNX","XYL","YUM","ZBRA","ZBH","ZION","ZTS"]
    test = ["TSLA","MMM","ABT","ABBV","ABMD","ACN","ATVI","ADBE","AMD","AAP","AES","AFL","A","APD","AKAM","ALK","ALB","ARE","ALXN","ALGN","ALLE","LNT","ALL","GOOGL","GOOG","MO","AMZN","AMCR","AEE","AAL","AEP","AXP","AIG","AMT","AWK","AMP","ABC","AME","AMGN","APH","ADI","ANSS","ANTM","AON","AOS","APA","AIV","AAPL","AMAT","APTV","ADM","ANET","AJG","AIZ","T","ATO","ADSK","ADP"]
    test_new = ["SPY","TSLA","MMM","ABT","ABBV","ABMD","ACN","ATVI","ADBE","AMD","AAP","AES","AFL","A","APD","AKAM","ALK","ALB","ARE","ALXN","ALGN","ALLE","LNT","ALL","GOOGL","GOOG","MO","AMZN","AMCR","AEE","AAL","AEP","AXP","AIG","AMT","AWK","AMP","ABC","AME","AMGN","APH","ADI","ANSS","ANTM","AON","AOS","APA","AIV","AAPL","AMAT","APTV","ADM","ANET","AJG","AIZ","T","ATO","ADSK","ADP","AZO","AVB","AVY","BKR","BLL","BAC","BK","BAX","BDX","BBY","BIO","BIIB","BLK","BA","BKNG","BWA","BXP","BSX","BMY","AVGO","BR","CHRW","COG","CDNS","CPB","COF","CAH","KMX","CCL","CAT","CBOE","CBRE","CDW","CE","CNC","CNP","CTL","CERN","CF","SCHW","CHTR","CVX","CMG","CB","CHD","CI","CINF","CTAS","CSCO","C","CFG","CTXS","CLX","CME","CMS","KO","CTSH","CL","CMCSA","CMA","CAG","CXO","COP","ED","STZ","COO","CPRT","GLW","CTVA","COST","COTY","CCI","CSX","CMI","CVS","DHI","DHR","DRI","DVA","DE","DAL","XRAY","DVN","DXCM","FANG","DLR","DFS","DISCA","DISCK","DISH","DG","DLTR","D","DPZ","DOV","DOW","DTE","DUK","DRE","DD","DXC","ETFC","EMN","ETN","EBAY","ECL","EIX","EW","EA","EMR","ETR","EOG","EFX","EQIX","EQR","ESS","EL","EVRG","ES","RE","EXC","EXPE","EXPD","EXR","XOM","FFIV","FB","FAST","FRT","FDX","FIS","FITB","FE","FRC","FISV","FLT","FLIR","FLS","FMC","F","FTNT","FTV","FBHS","FOXA","FOX","BEN","FCX","GPS","GRMN","IT","GD","GE","GIS","GM","GPC","GILD","GL","GPN","GS","GWW","HRB","HAL","HBI","HIG","HAS","HCA","PEAK","HSIC","HSY","HES","HPE","HLT","HFC","HOLX","HD","HON","HRL","HST","HPQ","HUM","HBAN","HII","IEX","IDXX","INFO","ITW","ILMN","INCY","IR","INTC","ICE","IBM","IP","IPG","IFF","INTU","ISRG","IVZ","IPGP","IQV","IRM","JKHY","J","JBHT","SJM","JNJ","JCI","JPM","JNPR","KSU","K","KEY","KEYS","KMB","KIM","KMI","KLAC","KSS","KHC","KR","LB","LHX","LH","LRCX","LW","LVS","LEG","LDOS","LEN","LLY","LNC","LIN","LYV","LKQ","LMT","L","LOW","LYB","MTB","MRO","MPC","MKTX","MAR","MMC","MLM","MAS","MA","MKC","MXIM","MCD","MCK","MDT","MRK","MET","MTD","MGM","MCHP","MU","MSFT","MAA","MHK","TAP","MDLZ","MNST","MCO","MS","MOS","MSI","MSCI","MYL","NDAQ","NOV","NTAP","NFLX","NWL","NEM","NWSA","NWS","NEE","NLSN","NKE","NI","NBL","NSC","NTRS","NOC","NLOK","NCLH","NRG","NUE","NVDA","NVR","ORLY","OXY","ODFL","OMC","OKE","ORCL","PCAR","PKG","PH","PAYX","PAYC","PYPL","PNR","PBCT","PEP","PKI","PRGO","PFE","PM","PSX","PNW","PXD","PNC","PPG","PPL","PFG","PG","PGR","PLD","PRU","PEG","PSA","PHM","PVH","QRVO","PWR","QCOM","DGX","RL","RJF","RTX","O","REG","REGN","RF","RSG","RMD","RHI","ROK","ROL","ROP","ROST","RCL","SPGI","CRM","SBAC","SLB","STX","SEE","SRE","NOW","SHW","SPG","SWKS","SLG","SNA","SO","LUV","SWK","SBUX","STT","STE","SYK","SIVB","SYF","SNPS","SYY","TMUS","TROW","TTWO","TPR","TGT","TEL","FTI","TDY","TFX","TXN","TXT","TMO","TIF","TJX","TSCO","TDG","TRV","TFC","TWTR","TYL","TSN","UDR","ULTA","USB","UAA","UA","UNP","UAL","UNH","UPS","URI","UHS","UNM","VFC","VLO","VAR","VTR","VRSN","VRSK","VZ","VRTX","V","VNO","VMC","WRB","WAB","WMT","WBA","DIS","WM","WAT","WEC","WFC","WELL","WST","WDC","WU","WRK","WY","WHR","WMB","WLTW","WYNN","XEL","XRX","XLNX","XYL","YUM","ZBRA","ZBH","ZION","ZTS"]
    new_test1 = ['AAPL','DIS','TSLA']
    api = tradeapi.REST(base_url='https://paper-api.alpaca.markets', key_id='PK82T2NVBWM7DQV85OBF',secret_key='YajdqAKB7iLjsJEFUbI6eGEBXEQ6pa4c5Pnqs9Hb', api_version='v2')
    IEX_TOKEN='pk_bed93d8d2b8d427bb908d273299688ef'


class Storage:
    #Icon = '/Volumes/HP_P500/Trading_Bot/Icon.png'
    Granger_Data1 = '/home/oblanco214/PyServer/lotusfiles/Data1/'
    Granger_Data2 = '/home/oblanco214/PyServer/lotusfiles/Data2/'
    Granger_Data3 = '/home/oblanco214/PyServer/lotusfiles/Data3/'
    #Gephi_Data = "/Volumes/HP_P500/Trading_Bot/Gephi/Data/"
    Granger_Data4 = '/home/oblanco214/PyServer/lotusfiles/Data4/'
    Gephi = '/home/oblanco214/PyServer/lotusfiles/Gephi/'
    Forecast_export1 = '/home/oblanco214/PyServer/lotusfiles/Forecasts/'
    Forecast_export2 = '/home/oblanco214/PyServer/lotusfiles/Forecasts2/'
    Log_export = '/home/oblanco214/PyServer/lotusfiles/Logs/'
    Diagnosed = '/home/oblanco214/PyServer/lotusfiles/Revised/'

class Analysis:
    def Pull(ticker_list,export,start,end,forecast_export, interval='1d'):
        counter = 0
        name = dt.datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
        separator = ','
        #print('(YYYY-MM-DD)')
        new_end_ord = dt.datetime.strptime(end,'%Y-%m-%d').toordinal() + 1
        new_end = dt.datetime.fromordinal(new_end_ord).strftime('%Y-%m-%d')
        value_list = []
        for x in ticker_list:
            counter = counter + 1
            print(counter)
            try:
                data = yf.download(x,start=start,end=end,interval=interval)
                data.to_csv(export + x +'.csv')
                actual = yf.download(x,start=end,end=new_end,interval=interval)
                if len(actual.Close) > 0:
                    value = actual.Close[0]
                    value_list.append(value)
                else:
                    value_list.append(float('NaN'))
            except ValueError:
                print("Yahoo Json Error")
                print("Continue Downloads")
        #print(value_list)
        actual_df = pd.DataFrame(index=ticker_list)
        actual_df['Actual'] = value_list
        actual_df.to_csv(forecast_export + 'Actual.csv')
        #log = open(Storage.Log_export+'Analysis/Pull/'+ name +'.txt','w+')
        #log.write(start+'\n')
        #log.write(end+'\n')
        #log.write('Interval: '+ interval + '\n')
        #company_list = separator.join(ticker_list)
        #log.write(company_list+'\n')
        #log.write(export+'\n')
        #log.close()
        return

    def ForwardPull(ticker_list,export,period='3mo', interval='1d'):
        counter = 0
        Previous_lst = []
        for x in ticker_list:
            counter = counter + 1
            print(counter)
            try:
                data = yf.download(x,period=period,interval=interval)
                data.to_csv(export + x +'.csv')
            except ValueError:
                print("Yahoo Json Error")
                print("Continue Downloads")
        return

    def IEXPull(ticker_list,export,start,end):
        IEX_TOKEN = Constants.IEX_TOKEN 
        counter = 0
        name = dt.datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
        separator = ','
        #print('(YYYY-MM-DD)')
        new_end_ord = dt.datetime.strptime(end,'%Y-%m-%d').toordinal() + 1
        new_end = dt.datetime.fromordinal(new_end_ord).strftime('%Y-%m-%d')
        value_list = []
        for x in ticker_list:
            counter = counter + 1
            print(counter)
            try:
                data = get_historical_data(x,start=dt.datetime.strptime(start,'%Y-%m-%d'),end=dt.datetime.strptime(end,'%Y-%m-%d'),output_format='pandas',close_only=True,token=IEX_TOKEN)
                data['Close'] = data.close
                data.to_csv(export + x +'.csv')
                actual = get_historical_data(x,start=dt.datetime.strptime(end,'%Y-%m-%d'),end=dt.datetime.strptime(new_end,'%Y-%m-%d'),output_format='pandas',close_only=True,token=IEX_TOKEN)
                actual['Close'] = actual.close
                if len(actual.Close) >0:
                    value = actual.Close[0]
                    #print(value)
                    value_list.append(value)
                else:
                    value_list.append(float('NaN'))
            except ValueError:
                print("Yahoo Json Error")
                print("Continue Downloads")
        #print(value_list)
        actual_df = pd.DataFrame(index=ticker_list)
        actual_df['Actual'] = value_list
        actual_df.to_csv(export + 'Actual.csv')
        #log = open(Storage.Log_export+'Analysis/Pull/'+ name +'.txt','w+')
        #log.write(start+'\n')
        #log.write(end+'\n')
        #log.write('Interval: '+ interval + '\n')
        #company_list = separator.join(ticker_list)
        #log.write(company_list+'\n')
        #log.write(export+'\n')
        #log.close()
        return

    def GTest(source,export,pvalue=0.05):
        data_list = []
        for filename in os.listdir(source):
            if filename.endswith(".csv"):
                new_name = os.path.splitext(filename)[0]
                data_list.append(new_name)
        Gmatrix = pd.DataFrame(index=data_list, columns=data_list)
        Node_export = pd.DataFrame()
        Node_export['Label'] = Gmatrix.columns
        Node_export.to_csv(Storage.Gephi_Data + 'Node_List.csv')
        Forecast_df = pd.DataFrame()
        Edge_export = pd.DataFrame()
        Edge_X = []
        Edge_Y = []
        Type_List = []
        Convert_list = Gmatrix.columns.tolist()
        F_test_counter = 0
        Chisq_counter = 0
        double = 0
        VAR_count = 0
        Spurious_counter = 0
        for x in Gmatrix.columns:
            Close_1 = pd.read_csv(source + x +'.csv')
            x_future_values = []
            if len(Close_1.Close) < 34:
                print(len(Close_1.Close))
                print('Sample One Is Too Small!')
            else:
                for y in Gmatrix.columns:
                    is_same = 0
                    Close_2 = pd.read_csv(source + y + '.csv')
                    if len(Close_2.Close) < 34:
                        print('Sample Two is too small')
                    else:
                        Close_df = pd.DataFrame()
                        Close_df[x] = Close_1.Close.diff()
                        Close_df[y] = Close_2.Close.diff()
                        # print(x,y)
                        Close_df.dropna(inplace=True)
                        if x != y:
                            res = tsa.grangercausalitytests(Close_df, 1, verbose=False)
                            f_test_p = res[1][0]['ssr_ftest'][1]
                            chi2_test_p = res[1][0]['ssr_chi2test'][1]
                            if chi2_test_p <= 0.05:
                                is_same = is_same + 1
                                Chisq_counter = Chisq_counter + 1
                            if f_test_p <= 0.05:
                                is_same = is_same + 1
                                F_test_counter = F_test_counter + 1
                            if is_same == 2:
                                double = double + 1
                                VAR_count = VAR_count + 1
                                Type_List.append('Directed')
                                Edge_Y.append(Convert_list.index(y))
                                Edge_X.append(Convert_list.index(x))
                                model = VAR(Close_df)
                                results = model.fit()
                                residual_df = results.resid
                                DW = durbin_watson(resids=residual_df)
                                print('------------------------------------')
                                print(DW)
                                residuals = residual_df.iloc[0:, 0] ** 2
                                RSS = residuals.sum()
                                SquaredSums = []
                                for p in Close_df.iloc[0:, 0]:
                                    tmp = p - Close_df.iloc[0:, 0].mean()
                                    SquaredSums.append(tmp ** 2)
                                SST = sum(SquaredSums)
                                R_squared = 1 - (RSS / SST)
                                print(R_squared)
                                print('------------------------------------')
                                if R_squared <= DW[0]:
                                    lag_order = results.k_ar
                                    pred = results.forecast(Close_df.values[-lag_order:], 1)
                                    future_value = pred[0][0] + Close_1.at[len(Close_1.Close) - 1, 'Close']
                                    print(x + ' as caused by ' + y)
                                    print('Not Spurious')
                                    print(future_value)
                                    x_future_values.append(future_value)
                                else:
                                    print("Is Spurious")
                                    Spurious_counter = Spurious_counter + 1
                print(x_future_values)
                if len(x_future_values) >= 1:
                    print(mean(x_future_values))
                    Forecast_df[x] = mean(x_future_values)
        print(F_test_counter)
        print(Chisq_counter)
        print(VAR_count)
        print(Spurious_counter)
        print(Forecast_df.T)
        Forecast_df.T.to_csv(export + 'Forecast.csv')
        Edge_export['Source'] = Edge_Y
        Edge_export['Target'] = Edge_X
        Edge_export['Type'] = Type_List
        Edge_export.to_csv(export + 'Edges.csv')

    def Stochastic_Hist(source,MA=3):
        new_list = []
        for x in os.listdir(source):
            if x.endswith(".csv"):
                new_list.append(x)
        for x in new_list:
            ticker_name = x.split('.')[0]
            data = pd.read_csv(source+x)
            print(ticker_name)
            print(data)

    def Coint_Gtest_Var(source,export,pvalue=0.05,safety=20):
        name = dt.datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
        data_list = []
        #log = open(Storage.Log_export+'Analysis/Coint_Gtest_VAR/'+ name + '.txt','w+')
        #log.write(source+'\n')
        #log.write(export+'\n')
        #log.write('Standard P-Value: ' + str(pvalue) + '\n')
        for filename in os.listdir(source):
            if filename.endswith(".csv"):
                new_name = os.path.splitext(filename)[0]
                data_list.append(new_name)
        separator = ','
        ticker_list_str = separator.join(data_list)
        #log.write(ticker_list_str+'\n')
        Forecast_df = pd.DataFrame(index=data_list,columns=data_list)
        R_Squared_df = pd.DataFrame(index=data_list,columns=data_list)
        non_cointegration_counter = 0
        cointegration_counter = 0
        previous_list = []
        x_counter = 0
        y_counter = 0
        for x in data_list:
            x_counter = x_counter + 1
            Ticker_1 = pd.read_csv(source + x + '.csv').dropna()
            x_future_values = []
            if len(Ticker_1.Close) < safety:
                print('Data for ' + x + ' is incomplete')
                #log.write('Data for ' + x + ' is incomplete\n')
            else:
                for y in data_list:
                    y_counter = y_counter + 1
                    print(x_counter,y_counter)
                    Ticker_2 = pd.read_csv(source + y + '.csv').dropna()
                    if len(Ticker_2.Close) < safety:
                        print('Data for' + y + 'is incomplete')
                        #log.write('Data for' + y + 'is incomplete\n')
                    else:
                        Close_df=pd.DataFrame()
                        Close_df[x] = Ticker_1.Close
                        Close_df[y] = Ticker_2.Close
                        if data_list.index(x) > data_list.index(y):
                            #print(x,y)
                            pair = [x,y]
                            ordered_pair = separator.join(pair)
                            #log.write(ordered_pair+'\n')
                            #print(Close_df)
                            Close_df.dropna(inplace=True)
                            #print(Close_df)
                            model = VAR(Close_df)
                            #print('Model Built...')
                            #log.write('Building Model...\n')
                            results = model.fit()
                            #print('Model has been fitted...')
                            #log.write('Model has been fitted...\n')
                            residual_df = results.resid
                            x_resids = residual_df.iloc[0:,0].dropna()
                            stationary_check = adfuller(x_resids)
                            #print(stationary_check)
                            if stationary_check[1] >= 0.05:
                                #print(x + ' is not cointegrated with ' + y)
                                #log.write(x + ' is not cointegrated with ' + y +'\n')
                                non_cointegration_counter = non_cointegration_counter + 1
                            if stationary_check[1] <= 0.05:
                                Close_df_inv = pd.DataFrame()
                                Close_df_inv[y] = Ticker_2.Close
                                Close_df_inv[x] = Ticker_1.Close
                                Close_df_inv.dropna(inplace=True)
                                #print(x + ' is cointegrated with ' + y)
                                #log.write(x + ' is cointegrated with ' + y + '\n')
                                cointegration_counter = cointegration_counter + 1
                                #print('Building Granger Causality Test...')
                                #log.write('Building Granger Causality Test...\n')
                                res = tsa.grangercausalitytests(Close_df,1,verbose=False)
                                res_inv = tsa.grangercausalitytests(Close_df_inv,1,verbose=False)
                                f_test_p = res[1][0]['ssr_ftest'][1]
                                chi2_test_p = res[1][0]['ssr_chi2test'][1]
                                f_test_p_inv = res_inv[1][0]['ssr_ftest'][1]
                                chi2_test_p_inv = res_inv[1][0]['ssr_chi2test'][1]
                                is_same = 0
                                is_same_inv = 0
                                if chi2_test_p <= 0.05:
                                    is_same = is_same + 1
                                if f_test_p <= 0.05:
                                    is_same = is_same + 1
                                if chi2_test_p_inv <= 0.05:
                                    is_same_inv = is_same_inv + 1
                                if f_test_p_inv <= 0.05:
                                    is_same_inv = is_same_inv + 1
                                if is_same == 2:
                                    DW = durbin_watson(residual_df)
                                    residuals = residual_df.iloc[0:,0]**2
                                    RSS = residuals.sum()
                                    SquaredSums = []
                                    for p in Close_df.iloc[0:,0]:
                                        tmp = p - Close_df.iloc[0:,0].mean()
                                        SquaredSums.append(tmp**2)
                                    SST = sum(SquaredSums)
                                    R_squared = 1 - (RSS/SST)
                                    #print('Checking for Spurious Regressions...')
                                    #log.write('Checking for Spurious Regressions\n')
                                    if R_squared <= DW[0]:
                                        lag_order = results.k_ar
                                        pred = results.forecast(Close_df.values[-lag_order:],1)
                                        future_value = pred[0][0]
                                        #print(future_value)
                                        Forecast_df.at[x,y] = future_value
                                        R_Squared_df.at[x,y] = R_squared
                                if is_same_inv == 2:
                                    model_inv = VAR(Close_df_inv)
                                    results_inv = model_inv.fit()
                                    residual_df_inv = results_inv.resid
                                    DW_inv = durbin_watson(residual_df_inv)
                                    residuals_inv = residual_df_inv.iloc[0:,0]**2
                                    RSS_inv = residuals_inv.sum()
                                    SquaredSums_inv = []
                                    for p in Close_df_inv.iloc[0:,0]:
                                        tmp_inv = p - Close_df_inv.iloc[0:,0].mean()
                                        SquaredSums_inv.append(tmp_inv**2)
                                    SST_inv = sum(SquaredSums_inv)
                                    R_squared_inv = 1 - (RSS_inv/SST_inv)
                                    #print('Checking for Spurious Regressions...')
                                    #log.write('Checking for Spurious Regressions\n')
                                    if R_squared_inv <= DW_inv[0]:
                                        lag_order_inv = results_inv.k_ar
                                        pred_inv = results_inv.forecast(Close_df_inv.values[-lag_order_inv:],1)
                                        future_value_inv = pred_inv[0][0]
                                        #print(future_value_inv)
                                        Forecast_df.at[y,x] = future_value_inv
                                        R_Squared_df.at[y,x] = R_squared_inv
            #print(non_cointegration_counter)
            #print(cointegration_counter)
            if len(Ticker_1.Close) > 0:
                Previous = Ticker_1.Close.iloc[-1]
                previous_list.append(Previous)
            else:
                previous_list.append(float('NaN'))
        Previous_df = pd.DataFrame(index=data_list)
        Previous_df['Previous'] = previous_list
        Previous_df.to_csv(export+'Previous.csv')
        New_df = Forecast_df.T
        New_R_df = R_Squared_df.T
        final_forecast_df = pd.DataFrame(index=data_list)
        Forecast_list = []
        for col in data_list:
            FC = New_df[col].mean()
            Forecast_list.append(FC)
        R_squared_list = []
        for each in data_list:
            Rsq = New_R_df[each].mean()
            R_squared_list.append(Rsq)
        final_forecast_df['Forecast'] = Forecast_list
        final_forecast_df['R-Squared'] = R_squared_list
        #print('Exporting Future Value(s)...')
        #log.write('Exporting Future Value(s)...\n')
        #print(final_forecast_df)
        final_forecast_df.to_csv(export + 'Forecast.csv')
        #log.close()
        return


    def CGV_Builder(data_list,corevalue,source,full_list,return_dict,safety=20):
        chunksize=int(len(data_list))
        Forecast_df = pd.DataFrame(index=full_list,columns=full_list)
        R_Squared_df = pd.DataFrame(index=full_list,columns=full_list)
        previous_list = []
        x_counter = 0
        non_cointegration_counter = 0
        cointegration_counter = 0
        for x in data_list:
            y_counter = 0
            x_counter = x_counter + 1
            Ticker_1 = pd.read_csv(source + x + '.csv').dropna()
            x_future_values = []
            if len(Ticker_1.Close) < safety:
                print('Data for ' + x + ' is incoomplete')
            else:
                for y in full_list:
                    y_counter = y_counter + 1
                    print(x_counter,y_counter)
                    Ticker_2 = pd.read_csv(source + y + '.csv').dropna()
                    if len(Ticker_2.Close) < safety:
                        print('Data for' + y + 'is incomplete')
                    elif x == y:
                        Forecast_df.at[x,y] = float('Nan')
                    else:
                        Close_df = pd.DataFrame()
                        Close_df[x] = Ticker_1.Close
                        Close_df[y] = Ticker_2.Close
                        #print(Close_df)
                        if int(data_list.index(x)) + (chunksize * corevalue) > full_list.index(y):
                            pair = [x,y]
                            #print(pair)
                            Close_df.dropna(inplace=True)
                            model = VAR(Close_df)
                            results = model.fit()
                            residual_df = results.resid
                            x_resids = residual_df.iloc[0:,0].dropna()
                            stationary_check = adfuller(x_resids)
                            if stationary_check[1] >= 0.05:
                                non_cointegration_counter = non_cointegration_counter + 1
                            elif stationary_check[1] <= 0.05:
                                Close_df_inv = pd.DataFrame()
                                Close_df_inv[y] = Ticker_2.Close
                                Close_df_inv[x] = Ticker_1.Close
                                Close_df_inv.dropna(inplace=True)
                                cointegration_counter = cointegration_counter + 1
                                res = tsa.grangercausalitytests(Close_df,1,verbose=False)
                                res_inv = tsa.grangercausalitytests(Close_df_inv,1,verbose=False)
                                f_test_p = res[1][0]['ssr_ftest'][1]
                                chi2_test_p = res[1][0]['ssr_chi2test'][1]
                                f_test_p_inv = res_inv[1][0]['ssr_ftest'][1]
                                chi2_test_p_inv = res_inv[1][0]['ssr_chi2test'][1]
                                is_same = 0
                                is_same_inv = 0
                                if chi2_test_p <= 0.05:
                                    is_same = is_same + 1
                                if f_test_p <= 0.05:
                                    is_same = is_same + 1
                                if chi2_test_p_inv <= 0.05:
                                    is_same_inv = is_same_inv + 1
                                if f_test_p_inv <= 0.05:
                                    is_same_inv = is_same_inv + 1
                                if is_same == 2:
                                    DW = durbin_watson(residual_df)
                                    residuals = residual_df.iloc[0:,0]**2
                                    RSS = residuals.sum()
                                    SquaredSums = []
                                    for p in Close_df.iloc[0:,0]:
                                        tmp = p - Close_df.iloc[0:,0].mean()
                                        SquaredSums.append(tmp**2)
                                    SST = sum(SquaredSums)
                                    R_squared = 1 - (RSS/SST)
                                    #print('Checking for Spurious Regressions...')
                                    #log.write('Checking for Spurious Regressions\n')
                                    if R_squared <= DW[0]:
                                        lag_order = results.k_ar
                                        pred = results.forecast(Close_df.values[-lag_order:],1)
                                        future_value = pred[0][0]
                                        #print(future_value)
                                        try:
                                            Forecast_df.at[x,y] = future_value
                                            R_Squared_df.at[x,y] = R_squared
                                        except:
                                            print('Out of Range')
                                if is_same_inv == 2:
                                    model_inv = VAR(Close_df_inv)
                                    results_inv = model_inv.fit()
                                    residual_df_inv = results_inv.resid
                                    DW_inv = durbin_watson(residual_df_inv)
                                    residuals_inv = residual_df_inv.iloc[0:,0]**2
                                    RSS_inv = residuals_inv.sum()
                                    SquaredSums_inv = []
                                    for p in Close_df_inv.iloc[0:,0]:
                                        tmp_inv = p - Close_df_inv.iloc[0:,0].mean()
                                        SquaredSums_inv.append(tmp_inv**2)
                                    SST_inv = sum(SquaredSums_inv)
                                    R_squared_inv = 1 - (RSS_inv/SST_inv)
                                    #print('Checking for Spurious Regressions...')
                                    #log.write('Checking for Spurious Regressions\n')
                                    if R_squared_inv <= DW_inv[0]:
                                        lag_order_inv = results_inv.k_ar
                                        pred_inv = results_inv.forecast(Close_df_inv.values[-lag_order_inv:],1)
                                        future_value_inv = pred_inv[0][0]
                                        #print(future_value_inv)
                                        try:
                                            Forecast_df.at[y,x] = future_value_inv
                                            R_Squared_df.at[y,x] = R_squared_inv
                                        except:
                                            print('Out of Range')
            if len(Ticker_1.Close) > 0:
                Previous = Ticker_1.Close.iloc[-1]
                previous_list.append(Previous)
            else:
                previous_list.append(float('Nan'))
        Previous_df = pd.DataFrame(index=data_list)
        Previous_df['Previous'] = previous_list
        #Previous_df.to_csv(export+'Previous.csv')
        New_df = Forecast_df.T
        New_R_df = R_Squared_df.T
        final_forecast_df = pd.DataFrame(index=data_list)
        Forecast_list = []
        for col in data_list:
            FC = New_df[col].mean()
            Forecast_list.append(FC)
        R_squared_list = []
        for each in data_list:
            Rsq = New_R_df[each].mean()
            R_squared_list.append(Rsq)
        final_forecast_df['Forecast'] = Forecast_list
        final_forecast_df['R-Squared'] = R_squared_list
        return_dict[corevalue] = final_forecast_df
        return



    def CGV_multithread(source,export,cores=20):
        full_list = []
        manager = mp.Manager()
        return_dict = manager.dict()
        for filename in os.listdir(source):
            if filename.endswith(".csv"):
                new_name = os.path.splitext(filename)[0]
                full_list.append(new_name)
        chunksize = int(len(full_list)/cores)
        num = 0
        Processes = []
        for chunk in range(1,cores+1):
            num = num + 1
            if chunk*chunksize >= len(full_list):
                chunk = full_list[(chunk-1)*chunksize:]
            else:
                chunk = full_list[(chunk-1)*chunksize:chunk*chunksize]
            #print(chunk)
            P = mp.Process(target=Analysis.CGV_Builder, args=[chunk,num,Storage.Granger_Data2,full_list,return_dict])
            Processes.append(P)
        for process in Processes:
            process.start()
        for process in Processes:
            process.join()
        #print(return_dict.values())
        final_forecast_df = pd.DataFrame(columns=['Forecast','R-Squared'])
        for section in return_dict.values():
            final_forecast_df = pd.concat([final_forecast_df,section])
        final_forecast_df.to_csv(export+'Forecast.csv')
        #print(final_forecast_df)
        
        
        #print(Analysis.CGV_Builder(full_list[:25],1,Storage.Granger_Data2,full_list))




#Analysis.CGV_multithread(Storage.Granger_Data2)






#Analysis.Stochastic_Hist(Storage.Granger_Data2)
#Analysis.Pull(ticker_list=Constants.test_new, export=Storage.Granger_Data2)
#Analysis.GTest(Storage.Granger_Data2, Storage.Forecast_export)
#Analysis.Coint_Gtest_Var(Storage.Granger_Data2, Storage.Forecast_export)
