AIFeynman: Attempt 2 partial success

AIFeynman: Attempt 2 partial success

#!/usr/bin/env python3

# -*- coding: utf-8 -*-

"""

Created on Sat May 29 16:21:00 2021


@author: rajeevgangal

"""


"""

Created on Mon May 24 16:53:40 2021


@author: rajeevgangal@VOIS GBIS

"""


from operator import index

from turtle import shape

from numpy import integer

import pandas as pd

from tables import Cols, Column


import aifeynman

import time


td = pd.read_csv("/home/rajeevgangal/myProjects/python/data/Telco-Customer-Churn.csv") #load the famous telecom churn dataset



td.infer_objects #try to automatically infer correct dtype


td1= td[['customerID','tenure','TotalCharges','MonthlyCharges']] #attempt 1 failed to use all columns so we shall unashamedly use numerical columns as X vars by default



td1['TotalCharges']= pd.to_numeric(td1['TotalCharges'], errors= 'coerce')



print(td1.dtypes) #cautionary dtype conversion check!


td2 =td[['gender','OnlineSecurity', 'Partner','Churn']] # look at kaggle notebooks and find top features rather than going thru ML feature selecton phase here.


td2= pd.get_dummies(td2,td2.columns,dummy_na=True,dtype=float) # one hot encoding.

td2.drop('Churn_No', inplace=True, axis=1) #dop unnecessarly complementary cols


td2['customerID']=td1['customerID'].values # need to join 2 dataframes with required features so add primary key


print(td2.dtypes) #check all is ok in terms of dtyps.


## append numeric cols


tdnew=pd.concat([td1,td2], axis=1, join="inner" )

dropobj=list(tdnew.select_dtypes(include='object').columns) # drop all object columns if they exist.


tdnew= tdnew.interpolate(method ='linear', limit_direction ='forward') #interpolate

tdnew.drop('customerID',inplace=True, axis=1) # AIfeynman doesn't need index col



tdnew = tdnew[tdnew.columns.drop(list(tdnew.filter(regex='nan')))] #nan's were giving errors so let me remove them


print(tdnew.head(2)) #just check new DF



tdnew.to_csv('/home/rajeevgangal/myProjects/python/example_data/tdclean.csv', sep=" ", header=False, index=False) #write space separated file without index/headers for AIFeynman

tdvars=tdnew.columns.tolist

tic = time.perf_counter()

print("Starting feynman search ", tic)

print(tdvars)


aifeynman.run_aifeynman("/home/rajeevgangal/myProjects/python/example_data/", "tdclean.csv", 60, "14ops.txt", polyfit_deg=3, NN_epochs=500, vars_name=tdvars, test_percentage=20)



toc = time.perf_counter()

timetaken= (toc-tic)/3600

print(" Ran feynman example in ", timetaken ," hrs")




Ajay singh Rajawat

Building Bhashini @MeitY | Speech Technologies | GovTech 2.0 | NLP/AI Innovation | Scrum(P) | AI Research - Ex MoE, Ex CIEI

3 年

How come partial sir , in the first one purpose was almost on plate..

要查看或添加评论,请登录

Rajeev Gangal的更多文章

社区洞察

其他会员也浏览了