Churn dataset Image generation code
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 6 23:09:40 2021
@author: Rajee
"""
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 28 23:43:13 2020
@author: Rajeev
"""
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0" #force cuda
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import pandas as pd
from tensorflow.keras.utils import to_categorical
import numpy as np
from PIL import Image #image generation, saving
import time
#import churn data
train_csv = pd.read_csv("customer_churn_train.csv", header="infer")
train_x= train_csv.iloc[:,1:20]
train_y= train_csv.loc[:,"Churn?"]
# compute the number of labels
num_labels = 2 # could be inferred from train_y, bad practice alert!
#Change Churn to Numeric binary lables and change other boolean types too
train_y.replace(to_replace="True.",value="1",inplace=True)
train_y.replace(to_replace="False.",value="0",inplace=True)
train_y = to_categorical(train_y) #took me time to figure out why my code was vitriolic. This is necesssary!
train_x.replace(to_replace="yes",value="1",inplace=True)
train_x.replace(to_replace="no",value="0",inplace=True)
#drop phone no column. Add-on exercise, impute missing values or derive features from these columns
train_x.drop(columns='Phone',inplace=True)
train_x.drop(columns="Int'l Plan",inplace=True)
train_x.drop(columns="VMail Plan",inplace=True)
(rows,input_size) = train_x.shape #input layer shape is no of columns
train_x=((train_x-train_x.std())/train_x.mean())
train_x=train_x.abs()
train_x= train_x*256
train_x = train_x.astype(int)
train_x=train_x.clip(lower=0,upper=255)
#image array creation params
nrows =train_x.shape[0] # no of images to generate
w=train_x.shape[1] #no of features is width
h=train_x.shape[1]# lets keep img height same as width
pix=(h,w,3)
myImageArray = np.zeros(pix,dtype=np.uint8)
for i in range(nrows): # each iteration represents a new image
for j in range(h):
for k in range(w):
myImageArray[j,k]=[train_x.iloc[i,k],train_x.iloc[i,k],train_x.iloc[i,k]]
myI = Image.fromarray(myImageArray,"RGB") #indent this and next row after first For loop
fp = "telecom-churn-row-" + str(i) + "."+ ".png" #dynamic filename for saving images per row
myI.save(fp)
Generative AI | Chatbots | Agents | Databricks | Azure | Data Science Consulting | ML Consulting | Python Consulting | End-to-End AI Solutions | Data Science Mentoring | MLOps | Bits Pilani
4 å¹´Hi Mr. Gangal. Please let me know if i am understanding this correctly : You have an observation with M features and you are trying to predict whether a customer with given so and so details will churn or not ? Essentialy the target is a binary variable. Now what we have at hand is a structured record which you are converting into an image for the CNN to do the prediction (or classification if i must say precisely ) If you are having N orbservations then N different images are generated which will then be split into training and testing, right ?