Innovate ML Model store with MongoDB as a Service
Lakshminarasimhan S.
~1 Billion Impressions | StoryListener | Polymath | PoliticalCritique | Agentic RAG Architect | Strategic Leadership | R&D
ML Model store automation is pretty simple if we know how to store and retrieve the trained models.
StatsModel, Scikit Learn, Tensorflow, Keras, Pytorch, Lightening and Ktrain are the Packages that provide different file handles to save and load the model in the file streams.
Let us visit one by one.
StatsModels
#save the model
import statsmodels.api as sm
model = sm.tsa.ARIMA([1,5,9,12], order=(1, 0, 1))
model = model.fit()
model.save(myfile)
#load the model
from statsmodels.tsa.arima_model import ARIMAResults
loaded = ARIMAResults.load(my_file)
Scikit-Learn Models
Using Pickle to store and retrieve the models
from sklearn.linear_model import LogisticRegression
import pickle
model = LogisticRegression()
model.fit(xtrain, ytrain)
# save the model
pickle.dump(model, open(model_file_path, 'wb'))
# load the model
model = pickle.load(open(model_file_path, 'rb'))
result_val = model.score(xval, yval)
result_test = model.score(xtest, ytest)
Using Joblib to store and retrieve the models
from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib
model = LogisticRegression()
model.fit(xtrain, ytrain)
# save the model to disk
joblib.dump(model, model_file_path)
# load the model saved to disk
model = joblib.load(model_file_path)
result_val = model.score(xval, yval)
result_test = model.score(xtest, ytest)
Keras Models
Using json file to store and retrieve the model
from keras.models import Sequential
from keras.layers import Dense
from keras.models import model_from_json
# create model
model = Sequential()
# Fit the model
model.fit(xtrain, ytrain)
# save to JSON file
json_file = model.to_json()
with open(json_file_path, "w") as file:
file.write(json_file)
# load json and create model
file = open(json_file, 'r')
model_json = file.read()
file.close()
loaded_model = model_from_json(model_json)
Using HDF5 file to store and retrieve the model.
from keras.models import Sequential
from keras.layers import Dense
# create model
model = Sequential()
# Fit the model
model.fit(xtrain, ytrain)
# save the weights to HDF5 file
model.save_weights(h5_file)
# load weights from saved model as hdf5 file
loaded_model.load_weights(h5_file)
Tensorflow Models
Storing and retrieving the Model weights using checkpoints ckpt file automatically for each epoch.
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?save_weights_only=True,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?verbose=1)
# Train the model with the new callback
model.fit(train_images,
? ? ? ? ? train_labels, ?
? ? ? ? ? epochs=10,
? ? ? ? ? validation_data=(test_images, test_labels),
? ? ? ? ? callbacks=[cp_callback]) ?# Pass callback to training
os.listdir(checkpoint_dir)
Load the weight from the check point, .ckpt or .cpt file and evaluate.
# Loads the weights
model.load_weights(checkpoint_path)
# Re-evaluate the model
loss, acc = model.evaluate(test_images, test_labels, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))
Storing and retrieving the weights manually using checkpoints after training.
# Save the weights
model.save_weights('./checkpoints/my_checkpoint')
# Create a new model instance
model = create_model()
# Restore the weights
model.load_weights('./checkpoints/my_checkpoint')
# Evaluate the model
loss, acc = model.evaluate(test_images, test_labels, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))
Storing and retrieving the Entire Model as Saved Model
# Create and train a new model instance.
model = create_model()
model.fit(train_images, train_labels, epochs=5)
# Save the entire model as a SavedModel.
!mkdir -p saved_model
model.save('saved_model/my_model')
# loading the model saved
new_model = tf.keras.models.load_model('saved_model/my_model'
# Check its architecture
new_model.summary())
# evaluate the restored model
loss, acc = new_model.evaluate(test_images, test_labels, verbose=2)
print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))
print(new_model.predict(test_images).shape)
Saving and Loading the Entire model using Keras HDF5 Format.
# Create and train a new model instance.
model = create_model()
model.fit(train_images, train_labels, epochs=5)
# Save the entire model to a HDF5 file.
# The '.h5' extension indicates that the model should be saved to HDF5.
model.save('my_model.h5')
Loading the HDF5 file using Keras.
# Recreate the exact same model, including its weights and the optimizer
new_model = tf.keras.models.load_model('my_model.h5')
# Show the model architecture
new_model.summary()
Saving and Loading the model using Saver() class
We are saving the whole model variables in the below model.
import tensorflow as tf
w1 = tf.Variable(tf.random_normal(shape=[2]), name='w1')
w2 = tf.Variable(tf.random_normal(shape=[5]), name='w2')
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
#saving the active model in tf session
saver.save(sess, 'my_test_model')
#loading the saved model
with tf.Session() as sess:
??new_saver = tf.train.import_meta_graph('my_test_model-1000.meta')
??new_saver.restore(sess, tf.train.latest_checkpoint('./'))
Restoring the model and retraining them.
import tensorflow as tf
?
sess=tf.Session()????
#First let's load meta graph and restore weights
saver = tf.train.import_meta_graph('my_test_model-1000.meta')
saver.restore(sess,tf.train.latest_checkpoint('./'))
?
?
# Now, let's access and create placeholders variables and
# create feed-dict to feed new data
?
graph = tf.get_default_graph()
w1 = graph.get_tensor_by_name("w1:0")
w2 = graph.get_tensor_by_name("w2:0")
feed_dict ={w1:13.0,w2:17.0}
?
#Now, access the op that you want to run.
op_to_restore = graph.get_tensor_by_name("op_to_restore:0")
?
print sess.run(op_to_restore,feed_dict)
#This will print 60 which is calculated
#using new values of w1 and w2 and saved value of b1.
A detailed API document can be found on Saver() class.
PyTorch
Save and Load the checkpoints.
EPOCH = 5
PATH = "model.pt"
LOSS = 0.4
torch.save({
'epoch': EPOCH,
'model_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': LOSS,
}, PATH)
model = Net()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
model.eval()
# - or -
model.train()
Saving and Loading Entire Pytorch Model
#save the active model
torch.save(model, PATH)
#load the saved model
model = torch.load(PATH)
model.eval()
Export and Load the PyTorch Model in Torchscript Format
model_scripted = torch.jit.script(model) # Export to TorchScript
model_scripted.save('model_scripted.pt') # Save
model = torch.jit.load('model_scripted.pt') # load
model.eval() # evaluate
Saving and Loading the Model variables using state_dic.
torch.save(model.state_dict(), PATH)
model = TheModelClass(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.eval()
Detailed API documentation has been given in the below link.
PyTorch Lightening
领英推荐
Automatic storing and retrieving of checkpoints while training at end of every epoch.
Lightning automatically saves a checkpoint for you in your current working directory, with the state of your last training epoch. This makes sure you can resume training in case it was interrupted.
To change the checkpoint path pass in:
# saves checkpoints to '/your/path/to/save/checkpoints' at every epoch end
trainer = Trainer(default_root_dir="/your/path/to/save/checkpoints")
# loading the saved checkpoints to the model to retrain
checkpoint_callback = ModelCheckpoint(dirpath="my/path/", save_top_k=2, monitor="val_loss"
trainer = Trainer(callbacks=[checkpoint_callback])
trainer.fit(model)
checkpoint_callback.best_model_path)
Manual storing and retrieving of checkpoints while training at end of training.
model = MyLightningModule(hparams)
trainer.fit(model)
trainer.save_checkpoint("example.ckpt")
new_model = MyModel.load_from_checkpoint(checkpoint_path="example.ckpt")
Loading the stored checkpoint into the active model.
model = MyLightingModule.load_from_checkpoint(PATH)
print(model.learning_rate)
# prints the learning_rate you used in this checkpoint
model.eval()
y_hat = model(x)
You can customize the behaviour of the checkpoint. A detailed API document can be found on the below link.
Ktrain Models
Save the active Model
# let'sc save the predictor for later us
predictor.save(r'.\run264_16\saved_model_25k')e
Load the saved model int active Model
# reload the predictor
reloaded_predictor = ktrain.load_predictor(r'.\run264_16\saved_model_25k')
Check the predicted results with the reloaded model above.
# make a prediction on the same document to verify it still work
results=reloaded_predictor.predict(np.array(df1['Text'][0:16]))s
How to innovate the Model store using Mongo DB?
How to store and retrieve the raw data for training the model in Mongo DB?
We use pymongo package for connecting the Mongo DB Atlas database - On-prem / Server
pip install pymongo and pip install pymongo[srv] are two variants available.
Import the package.
from pymongo import MongoClient
import json
# Local server
client = MongoClient("mongodb://user:user123@localhost:27017")
# or Cloud server
client = MongoClient("mongodb+srv://user:[email protected]/myFirstDatabase?retryWrites=true&w=majority")
db = client.test
initiate the database instance
db = client.get_database('db_analytics')
initiate the collection or table instance
records = db.swot
delete the data if any exist already
records.delete_many({})
records.count_documents({})
load the new data into the swot table
import pandas as pd
data = pd.read_csv(r'C:\Users\sln27\SWOT-Bot-master\train.csv',encoding='latin1')
data_json = json.loads(data.to_json(orient='records'))
records.insert_many(data_json)
check the loaded records count.
records.count_documents({})
How to store and retrieve the trained models into Mongo DB?
Import the package and connect the server.
from pymongo import MongoClient
from bson.binary import Binary
from bson import ObjectId
import pymongo, gridfs
from gridfs import GridFS
# Local server
client = MongoClient("mongodb://user:user123@localhost:27017")
# or Cloud server
client = MongoClient("mongodb+srv://user:[email protected]/myFirstDatabase?retryWrites=true&w=majority")
db = client.test
initiate the database instance
db = client.get_database('db_analytics')
initiate the collection or table instance
records = db.models
Path of the model file where the model files have been shared. Irrespective of files, it will work with all types of files. As per the above model save methods, the path and file name should be referred individually to load into the MongoDB as a document ( binary objects).
model_file=r'C:\Users\sln27\SWOT-Bot-master\run264_16\saved_model_25k\tf_model.h5'
with open(model_file, "rb") as f:
encoded = Binary(f.read())
Save the document into the collections referred above db.models and retrieve them back.
#storing the trained model in mongo collection
records.insert_one({"filename": model_file, "file": encoded, "description": "Keras model" }
records.count_documents({}))
#retrieving the model file back to physical model
data=records.find_one({'description': 'Keras model'})
type(data)
with open("tf_model.preproc", "wb") as f
f.write(data['file']):
It is writing the file back into the exact path mentioned in the file variable of the model's collection.
Hope you have enjoyed reading the post. I will come up with a new topic tomorrow.
Have a wonderful day!
Disclaimer: The author has taken extreme caution for the accuracy of the info. This post is for informational purposes only. The reader can experiment with it at their own risk.
Técnico eletroeletr?nico
1 年thank u
E-commerce Agency Manager | Digital Marketing | Social Media Marketing | Facebook Ads | Website Creation | Wordpress Development | Ecommerce Store | Shopify Store | Graphic Designing | Data Entry Operator |
1 年#letsconnect