AIFeynman: Attempt 2 partial success
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat May 29 16:21:00 2021
@author: rajeevgangal
"""
"""
Created on Mon May 24 16:53:40 2021
@author: rajeevgangal@VOIS GBIS
"""
from operator import index
from turtle import shape
from numpy import integer
import pandas as pd
from tables import Cols, Column
import aifeynman
import time
td = pd.read_csv("/home/rajeevgangal/myProjects/python/data/Telco-Customer-Churn.csv") #load the famous telecom churn dataset
td.infer_objects #try to automatically infer correct dtype
td1= td[['customerID','tenure','TotalCharges','MonthlyCharges']] #attempt 1 failed to use all columns so we shall unashamedly use numerical columns as X vars by default
td1['TotalCharges']= pd.to_numeric(td1['TotalCharges'], errors= 'coerce')
print(td1.dtypes) #cautionary dtype conversion check!
td2 =td[['gender','OnlineSecurity', 'Partner','Churn']] # look at kaggle notebooks and find top features rather than going thru ML feature selecton phase here.
td2= pd.get_dummies(td2,td2.columns,dummy_na=True,dtype=float) # one hot encoding.
td2.drop('Churn_No', inplace=True, axis=1) #dop unnecessarly complementary cols
td2['customerID']=td1['customerID'].values # need to join 2 dataframes with required features so add primary key
print(td2.dtypes) #check all is ok in terms of dtyps.
## append numeric cols
tdnew=pd.concat([td1,td2], axis=1, join="inner" )
dropobj=list(tdnew.select_dtypes(include='object').columns) # drop all object columns if they exist.
tdnew= tdnew.interpolate(method ='linear', limit_direction ='forward') #interpolate
tdnew.drop('customerID',inplace=True, axis=1) # AIfeynman doesn't need index col
tdnew = tdnew[tdnew.columns.drop(list(tdnew.filter(regex='nan')))] #nan's were giving errors so let me remove them
print(tdnew.head(2)) #just check new DF
tdnew.to_csv('/home/rajeevgangal/myProjects/python/example_data/tdclean.csv', sep=" ", header=False, index=False) #write space separated file without index/headers for AIFeynman
tdvars=tdnew.columns.tolist
tic = time.perf_counter()
print("Starting feynman search ", tic)
print(tdvars)
aifeynman.run_aifeynman("/home/rajeevgangal/myProjects/python/example_data/", "tdclean.csv", 60, "14ops.txt", polyfit_deg=3, NN_epochs=500, vars_name=tdvars, test_percentage=20)
toc = time.perf_counter()
timetaken= (toc-tic)/3600
print(" Ran feynman example in ", timetaken ," hrs")
Building Bhashini @MeitY | Speech Technologies | GovTech 2.0 | NLP/AI Innovation | Scrum(P) | AI Research - Ex MoE, Ex CIEI
3 年How come partial sir , in the first one purpose was almost on plate..