Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added two new parameters to summary: verbose and csv. Verbose shows m… #108

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 54 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,63 @@ Estimated Total Size (MB): 0.78
----------------------------------------------------------------
```

#### verbose and CSV

```python
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)

def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x, dim=1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0
model = Net().to(device)

summary(model, (1, 28, 28), verbose = True, csv = True)
```

```
-----------------------------------------------------------------------------------------
Layer (type) Input Shape Output Shape kernel_size stride padding Param #
=========================================================================================
Conv2d-1 [-1, 1, 28, 28] [-1, 10, 24, 24] (5, 5) (1, 1) (0, 0) 260
Conv2d-2 [-1, 10, 12, 12] [-1, 20, 8, 8] (5, 5) (1, 1) (0, 0) 5,020
Dropout2d-3 [-1, 20, 8, 8] [-1, 20, 8, 8] 0 0 0 0
Linear-4 [-1, 320] [-1, 50] 0 0 0 16,050
Linear-5 [-1, 50] [-1, 10] 0 0 0 510
=========================================================================================
Total params: 21,840
Trainable params: 21,840
Non-trainable params: 0
-----------------------------------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.06
Params size (MB): 0.08
Estimated Total Size (MB): 0.15
-----------------------------------------------------------------------------------------
```

When ```csv = True```, all shown parameters are saved on two files: ```model_layers.csv``` and ```model_params.csv```

### References

- The idea for this package sparked from [this PyTorch issue](https://github.com/pytorch/pytorch/issues/2001).
- Thanks to @ncullen93 and @HTLife.
- Thanks to @ncullen93, @HTLife, and @Erick7451.
- For Model Size Estimation @jacobkimmel ([details here](https://github.com/sksq96/pytorch-summary/pull/21))
169 changes: 131 additions & 38 deletions torchsummary/torchsummary.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
import torch
import pandas as pd
import torch.nn as nn
from torch.autograd import Variable

from collections import OrderedDict
import numpy as np


def summary(model, input_size, batch_size=-1, device=torch.device('cuda:0'), dtypes=None):
if dtypes == None:
dtypes = [torch.FloatTensor]*len(input_size)
def summary(model, input_size, batch_size=-1, device="cuda", verbose = False, csv = False):

def register_hook(module):

def hook(module, input, output):
# name of operation (ex: Conv2)
class_name = str(module.__class__).split(".")[-1].split("'")[0]
module_idx = len(summary)

# Conv2-index
m_key = "%s-%i" % (class_name, module_idx + 1)
# element of m_key is another OrderedDict()
summary[m_key] = OrderedDict()
# Pass Key and Elements to inside OrderedDict()
summary[m_key]["input_shape"] = list(input[0].size())
summary[m_key]["input_shape"][0] = batch_size
if isinstance(output, (list, tuple)):
Expand All @@ -35,20 +37,39 @@ def hook(module, input, output):
if hasattr(module, "bias") and hasattr(module.bias, "size"):
params += torch.prod(torch.LongTensor(list(module.bias.size())))
summary[m_key]["nb_params"] = params
# if class if Conv, attain: kernel_size, stride, padding
if class_name.__contains__("Conv"):
summary[m_key]["kernel_size"] = module.kernel_size
summary[m_key]["stride"] = module.stride
summary[m_key]["padding"] = module.padding

if (
not isinstance(module, nn.Sequential)
and not isinstance(module, nn.ModuleList)
and not (module == model)
):
hooks.append(module.register_forward_hook(hook))



device = device.lower()
assert device in [
"cuda",
"cpu",
], "Input device is not valid, please specify 'cuda' or 'cpu'"

if device == "cuda" and torch.cuda.is_available():
dtype = torch.cuda.FloatTensor
else:
dtype = torch.FloatTensor

# multiple inputs to the network
if isinstance(input_size, tuple):
input_size = [input_size]


# batch_size of 2 for batchnorm
x = [ torch.rand(2, *in_size).type(dtype).to(device=device) for in_size, dtype in zip(input_size, dtypes)]
x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]
# print(type(x[0]))

# create properties
summary = OrderedDict()
Expand All @@ -65,43 +86,115 @@ def hook(module, input, output):
for h in hooks:
h.remove()

print("----------------------------------------------------------------")
line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
print(line_new)
print("================================================================")
if verbose:
print("-" * 112)
line_new = "{:>20} {:>20} {:>20} {:>10} {:>10} {:>10} {:>10}".format("Layer (type)", "Input Shape","Output Shape","kernel_size","stride","padding", "Param #")
print(line_new)
print("=" * 112)
else:
print("----------------------------------------------------------------")
line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
print(line_new)
print("================================================================")
total_params = 0
total_output = 0
trainable_params = 0
for layer in summary:
# input_shape, output_shape, trainable, nb_params
line_new = "{:>20} {:>25} {:>15}".format(
layer,
str(summary[layer]["output_shape"]),
"{0:,}".format(summary[layer]["nb_params"]),
)
total_params += summary[layer]["nb_params"]

total_output += np.prod(summary[layer]["output_shape"])
if "trainable" in summary[layer]:
if summary[layer]["trainable"] == True:
trainable_params += summary[layer]["nb_params"]
print(line_new)
if verbose:
for layer in summary:
# if false, add parameters
if not layer.__contains__("Conv"):
summary[layer]['kernel_size'] = 0
summary[layer]['stride'] = 0
summary[layer]['padding'] = 0

line_new = "{:>20} {:>20} {:>20} {:>10} {:>10} {:>10} {:>10}".format(
layer,
str(summary[layer]["input_shape"]),
str(summary[layer]["output_shape"]),
"{0}".format(summary[layer]['kernel_size']),
"{0}".format(summary[layer]['stride']),
"{0}".format(summary[layer]['padding']),
"{0:,}".format(summary[layer]["nb_params"]),
)
total_params += summary[layer]["nb_params"]
total_output += np.prod(summary[layer]["output_shape"])
if "trainable" in summary[layer]:
if summary[layer]["trainable"] == True:
trainable_params += summary[layer]["nb_params"]
print(line_new)


else:
for layer in summary:
# input_shape, output_shape, trainable, nb_params
line_new = "{:>20} {:>25} {:>15}".format(
layer,
str(summary[layer]["output_shape"]),
"{0:,}".format(summary[layer]["nb_params"]),
)
total_params += summary[layer]["nb_params"]
total_output += np.prod(summary[layer]["output_shape"])
if "trainable" in summary[layer]:
if summary[layer]["trainable"] == True:
trainable_params += summary[layer]["nb_params"]
print(line_new)

# assume 4 bytes/number (float on cuda).
total_input_size = abs(np.prod(sum(input_size, ())) * batch_size * 4. / (1024 ** 2.))
total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients
total_params_size = abs(total_params * 4. / (1024 ** 2.))
total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
total_size = total_params_size + total_output_size + total_input_size

print("================================================================")
print("Total params: {0:,}".format(total_params))
print("Trainable params: {0:,}".format(trainable_params))
print("Non-trainable params: {0:,}".format(total_params - trainable_params))
print("----------------------------------------------------------------")
print("Input size (MB): %0.2f" % total_input_size)
print("Forward/backward pass size (MB): %0.2f" % total_output_size)
print("Params size (MB): %0.2f" % total_params_size)
print("Estimated Total Size (MB): %0.2f" % total_size)
print("----------------------------------------------------------------")
if verbose:
print("="*112)
print("Total params: {0:,}".format(total_params))
print("Trainable params: {0:,}".format(trainable_params))
print("Non-trainable params: {0:,}".format(total_params - trainable_params))
print("-"*112)
print("Input size (MB): %0.2f" % total_input_size)
print("Forward/backward pass size (MB): %0.2f" % total_output_size)
print("Params size (MB): %0.2f" % total_params_size)
print("Estimated Total Size (MB): %0.2f" % total_size)
print("-"*112)
else:
print("================================================================")
print("Total params: {0:,}".format(total_params))
print("Trainable params: {0:,}".format(trainable_params))
print("Non-trainable params: {0:,}".format(total_params - trainable_params))
print("----------------------------------------------------------------")
print("Input size (MB): %0.2f" % total_input_size)
print("Forward/backward pass size (MB): %0.2f" % total_output_size)
print("Params size (MB): %0.2f" % total_params_size)
print("Estimated Total Size (MB): %0.2f" % total_size)
print("----------------------------------------------------------------")
# return summary
return total_params, trainable_params

# if csv == True, print a df
if csv:
if verbose:
cols = ["input_shape","output_shape","kernel_size","stride","padding", "nb_params"]
else:
cols = ["output_shape", "nb_params"]
idx = summary.keys()
vals = []
for layer in summary:
dict_vals = [summary[layer][key] if type(summary[layer][key]) != torch.Tensor else summary[layer][key].item() for key in cols]
vals.append(dict_vals)



df = pd.DataFrame(vals, index = idx, columns = cols)
df.index.name = 'Layer'
df.to_csv('model_layers.csv')

# Create a second df with: total params, trainable params, non-trainable params, input size, forward/backward pass, params size, estimated total size
cols = ['Total params', 'Trainable params', 'Non-trainable params', 'Input size (MB)', "Forward/backward pass size (MB):","Params size (MB)", "Estimated Total Size (MB)"]
vals = [total_params.item(), trainable_params.item(), (total_params - trainable_params).item(), total_input_size, total_output_size, total_params_size, total_size]
df2 = pd.DataFrame(vals, index = cols, columns = ['model params']).transpose()
df2.to_csv('model_params.csv')