AIminify automatically determines the best saving method for your compressed model based on its type and compression techniques applied. The loading method you use depends on how the model was saved.
AIminify follows this hierarchy when saving PyTorch models:
1. Try torch.save() → Load with torch.load()
2. If fails: torch.jit.trace() → Load with torch.jit.load()
3. If fails: torch.jit.script() → Load with torch.jit.load()
4. If all fail: Manual export required (e.g., ONNX)
- Model was saved with
torch.save()
- Non-quantized models
- Models that didn't require JIT compilation
- Log message shows: "model can be loaded with torch.load()"
- Model was saved with
torch.jit.trace()
or torch.jit.script()
- Most of the times, both for non-quantized and quantized models (most common cases)
- Models that couldn't be saved with standard
torch.save()
- Log message shows: "model can be loaded with torch.jit.load()"
import torch
import torchvision.models as models
from aiminify import minify, save_model
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
compressed_model, _ = minify(
model,
quantization=False,
compression_strength=3,
)
save_model(
compressed_model,
input_shape=(1, 3, 224, 224),
path="compressed_model.pt",
)
loaded_model = torch.jit.load("compressed_model.pt")
loaded_model.eval()
with torch.no_grad():
sample_input = torch.randn(1, 3, 224, 224)
output = loaded_model(sample_input)
For maximum compatibility, use this robust loading approach:
def load_aiminify_model(model_path):
"""
Robust function to load AIminify compressed PyTorch models.
Tries both loading methods automatically.
"""
try:
model = torch.load(model_path, map_location='cpu')
print(f"Model loaded successfully with torch.load()")
return model
except Exception as e:
print(f"torch.load() failed: {e}")
try:
model = torch.jit.load(model_path, map_location='cpu')
print(f"Model loaded successfully with torch.jit.load()")
return model
except Exception as e2:
raise RuntimeError(f"Failed to load model with both methods. "
f"torch.load() error: {e}, torch.jit.load() error: {e2}")
model = load_aiminify_model("compressed_model.pt")
model.eval()
compressed_model, _ = minify(
model,
compression_strength=3,
quantization=False,
fine_tune=True,
)
save_model(
compressed_model,
input_shape=(1, 3, 224, 224),
path="pruned_model.pt",
)
loaded_model = torch.jit.load("pruned_model.pt")
compressed_model, _ = minify(
model,
compression_strength=3,
quantization=True,
training_generator=train_loader
)
save_model(compressed_model, input_shape=(1, 3, 224, 224), path="quantized_model.pt")
loaded_model = torch.jit.load("quantized_model.pt")
AIminify uses different formats based on model type:
- Standard Models: .keras format → Load with tf.keras.models.load_model()
- Quantized Models: .tflite format → Load with tf.lite.Interpreter()
import tensorflow as tf
from aiminify import minify, save_model
model = tf.keras.applications.ResNet50(weights='imagenet')
compressed_model, _ = minify(
model,
quantization=False,
compression_strength=3,
)
save_model(
compressed_model,
path="compressed_model.keras",
input_shape=(224, 224, 3),
)
loaded_model = tf.keras.models.load_model("compressed_model.keras")
sample_input = tf.random.normal((1, 224, 224, 3))
output = loaded_model(sample_input)
import tensorflow as tf
from aiminify import minify, save_model
model = tf.keras.applications.ResNet50(weights='imagenet')
compressed_model, _ = minify(
model,
quantization=True,
compression_strength=3,
)
save_model(
compressed_model,
path="quantized_model.tflite",
input_shape=(224, 244, 3),
)
interpreter = tf.lite.Interpreter(model_path="quantized_model.tflite")
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
sample_input = tf.random.normal((1, 224, 224, 3)).numpy()
interpreter.set_tensor(input_details[0]['index'], sample_input)
interpreter.invoke()
output = interpreter.get_tensor(output_details[0]['index'])
def load_aiminify_tf_model(model_path):
"""
Load AIminify compressed TensorFlow models based on file extension.
"""
if model_path.endswith('.keras') or model_path.endswith('.h5'):
model = tf.keras.models.load_model(model_path)
print(f"Keras model loaded from {model_path}")
return model, 'keras'
elif model_path.endswith('.tflite'):
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()
print(f"TFLite model loaded from {model_path}")
return interpreter, 'tflite'
else:
raise ValueError(f"Unsupported file format: {model_path}")
model, model_type = load_aiminify_tf_model("compressed_model.keras")
if model_type == 'keras':
output = model(sample_input)
elif model_type == 'tflite':
input_details = model.get_input_details()
output_details = model.get_output_details()
model.set_tensor(input_details[0]['index'], sample_input.numpy())
model.invoke()
output = model.get_tensor(output_details[0]['index'])
import torch
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from aiminify import minify, save_model
model = torch.hub.load(
'pytorch/vision',
'resnet18',
pretrained=True,
)
model.eval()
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
dataset = CIFAR10(
root='./data',
train=True,
download=True,
transform=transform,
)
dataloader = DataLoader(
dataset,
batch_size=32,
shuffle=True,
)
compressed_model, feedback = minify(
model,
compression_strength=3,
quantization=True,
training_generator=dataloader,
fine_tune=True,
verbose=1,
)
save_model(
compressed_model,
input_shape=(1, 3, 32, 32),
path="compressed_resnet18.pt",
)
print("Check the log message above to see which loading method to use!")
def load_aiminify_model(model_path):
try:
return torch.load(model_path, map_location='cpu')
except:
return torch.jit.load(model_path, map_location='cpu')
loaded_model = load_aiminify_model("compressed_resnet18.pt")
loaded_model.eval()
with torch.no_grad():
sample_input = torch.randn(1, 3, 32, 32)
output = loaded_model(sample_input)
predicted_class = torch.argmax(output, dim=1)
print(f"Predicted class: {predicted_class.item()}")
import tensorflow as tf
from aiminify import minify, save_model
model = tf.keras.applications.MobileNetV2(
input_shape=(224, 224, 3),
weights='imagenet',
include_top=True,
)
sample_data = tf.random.normal((100, 224, 224, 3))
compressed_model, feedback = minify(
model,
compression_strength=3,
quantization=True,
verbose=1,
)
save_model(compressed_model, path="compressed_mobilenet.tflite")
if compressed_model.endswith('.tflite'):
interpreter = tf.lite.Interpreter(model_path="compressed_mobilenet.tflite")
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
sample_input = tf.random.normal((1, 224, 224, 3))
interpreter.set_tensor(input_details[0]['index'], sample_input.numpy())
interpreter.invoke()
output = interpreter.get_tensor(output_details[0]['index'])
else:
loaded_model = tf.keras.models.load_model("compressed_mobilenet.keras")
output = loaded_model(sample_input)
print(f"Model output shape: {output.shape}")
: torch.load()
fails with serialization error
Solution: The model was likely saved with JIT. Use torch.jit.load() instead.
: JIT model gives different results than original
Solution: This is expected for quantized models. Small numerical differences are normal.
: Model file is very large after compression
Solution: Check if quantization was applied. Non-quantized models may not reduce file size significantly.
: .keras
file won't load
Solution: Try loading as .h5 format or check TensorFlow version compatibility.
: TFLite model gives errors during inference
Solution: Ensure input data type and shape match the model's expectations. TFLite models often require specific data types.
: Don't know which loading method to use
Solution: Check the console output from save_model(). It explicitly tells you which loading method to use.
: Model performance is significantly degraded
Solution: Try lower compression_strength values or disable quantization for better accuracy retention.
from save_model()
- it tells you exactly which loading method to use
provided above for maximum compatibility
after loading to ensure the model works correctly
before compression for comparison
- start with lower compression strengths for critical applications
, prefer the explicit loading method shown in logs over robust fallback functions
- : Use
torch.load()
for standard models, torch.jit.load()
for quantized/JIT models - : Use
tf.keras.models.load_model()
for .keras files, tf.lite.Interpreter()
for .tflite files - from AIminify to know which method to use
- when in doubt