Skip to content

Commit

Permalink
Merge pull request #1096 from lzjpaul/23-9-14-ms
Browse files Browse the repository at this point in the history
Training process for mlp models with varying layer sizes
  • Loading branch information
chrishkchris authored Sep 14, 2023
2 parents 7154dad + 369f0f2 commit cc741f5
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 17 deletions.
42 changes: 25 additions & 17 deletions examples/model_selection_psql/ms_mlp/train_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,17 @@ def __call__(self, loss):
return pn_p_g_list

def call_with_returns(self, loss):
print ("call_with_returns loss.data: \n", loss.data)
# print ("call_with_returns loss.data: \n", loss.data)
pn_p_g_list = []
for p, g in autograd.backward(loss):
if p.name is None:
p.name = id(p)
self.apply(p.name, p, g)
pn_p_g_list.append(p.name, p, g)
print ("call with returns")
print ("p.name: \n", p.name)
print ("p.data: \n", p.data)
print ("g.data: \n", g.data)
# print ("call with returns")
# print ("p.name: \n", p.name)
# print ("p.data: \n", p.data)
# print ("g.data: \n", g.data)
return pn_p_g_list

class MSSGD(MSOptimizer):
Expand Down Expand Up @@ -549,15 +549,23 @@ def run(global_rank,

args = parser.parse_args()

mssgd = MSSGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
run(0,
1,
args.device_id,
args.max_epoch,
args.batch_size,
args.model,
args.data,
mssgd,
args.graph,
args.verbosity,
precision=args.precision)
DEFAULT_LAYER_CHOICES_4 = [8, 16, 24, 32]
for layer1 in DEFAULT_LAYER_CHOICES_4:
for layer2 in DEFAULT_LAYER_CHOICES_4:
for layer3 in DEFAULT_LAYER_CHOICES_4:
for layer4 in DEFAULT_LAYER_CHOICES_4:
layer_hidden_list = [layer1, layer2+1, layer3+2, layer4+3]
# print ("layer_hidden_list: \n", layer_hidden_list)
mssgd = MSSGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision])
run(0,
1,
args.device_id,
layer_hidden_list,
args.max_epoch,
args.batch_size,
args.model,
args.data,
mssgd,
args.graph,
args.verbosity,
precision=args.precision)
13 changes: 13 additions & 0 deletions examples/model_selection_psql/msmlp/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,21 @@ def forward(self, inputs):
return y

def train_one_batch(self, x, y, synflow_flag, dist_option, spars):
# print ("in train_one_batch")
out = self.forward(x)
# print ("train_one_batch x.data: \n", x.data)
# print ("train_one_batch y.data: \n", y.data)
# print ("train_one_batch out.data: \n", out.data)
if synflow_flag:
loss = self.sum_error(out)
# print ("sum_error")
else: # normal training
loss = self.softmax_cross_entropy(out, y)

if dist_option == 'plain':
# print ("before pn_p_g_list = self.optimizer(loss)")
pn_p_g_list = self.optimizer(loss)
# print ("after pn_p_g_list = self.optimizer(loss)")
elif dist_option == 'half':
self.optimizer.backward_and_update_half(loss)
elif dist_option == 'partialUpdate':
Expand All @@ -115,7 +122,13 @@ def train_one_batch(self, x, y, synflow_flag, dist_option, spars):
self.optimizer.backward_and_sparse_update(loss,
topK=False,
spars=spars)
# print ("len(pn_p_g_list): \n", len(pn_p_g_list))
# print ("len(pn_p_g_list[0]): \n", len(pn_p_g_list[0]))
# print ("pn_p_g_list[0][0]: \n", pn_p_g_list[0][0])
# print ("pn_p_g_list[0][1].data: \n", pn_p_g_list[0][1].data)
# print ("pn_p_g_list[0][2].data: \n", pn_p_g_list[0][2].data)
return pn_p_g_list, out, loss
# return pn_p_g_list[0], pn_p_g_list[1], pn_p_g_list[2], out, loss

def set_optimizer(self, optimizer):
self.optimizer = optimizer
Expand Down

0 comments on commit cc741f5

Please sign in to comment.