AIFeynman: Attempt 1
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon May 24 16:53:40 2021
@author: rajeevgangal @VOIS, GBIS.
"""
from operator import index
from turtle import shape
from numpy import integer
import pandas as pd
from tables import Cols, Column
import aifeynman
import time
td = pd.read_csv("/home/rajeevgangal/myProjects/python/data/Telco-Customer-Churn.csv") # read the famous telecom churn dataset
tdpost= td[['Churn']] #separate response variable
td.infer_objects
td['TotalCharges']= pd.to_numeric(td['TotalCharges'], errors= 'coerce') # some fields are easily changeable to numeric
myobjlist= list(td.select_dtypes(include='object').columns) #list other columns that are objects/categorial
myobjlist.pop(0) #customerID not needed in object list for one-hot encoding
td= pd.get_dummies(td,columns=myobjlist,dummy_na=True,dtype=float) #its hot hot hot encoding or how to increase dimensionaly to feel curse of the same
#drop some columns with complementary redundant info
td.drop('Churn_No',inplace=True, axis=1)
td.drop('customerID',inplace=True, axis=1)
td.drop('Churn_nan',inplace=True, axis=1)
#interpolate some missing values
td= td.interpolate(method ='linear', limit_direction ='forward')
#aifeynman apparently needs a space separated file with no index and no headers, so lets write that
td.to_csv('/home/rajeevgangal/myProjects/python/example_data/tdclean.csv', sep=" ", header=False, index=False)
tdvars=td.columns.tolist
tic = time.perf_counter() #likely to be long running so time it.
print("Starting feynman search ", tic)
print(tdvars) #print which vars are being used, churn should be last col.
aifeynman.run_aifeynman("/home/rajeevgangal/myProjects/python/example_data/", "tdclean.csv", 60, "14ops.txt", polyfit_deg=3, NN_epochs=500, vars_name=tdvars, test_percentage=20)
toc = time.perf_counter()
timetaken= (toc-tic)/3600
print(" Ran feynman example in ", timetaken ," hrs")