pytorch 参数初始化

时间：2020-11-04 18:26:34 阅读：13 评论：0 收藏：0 [点我收藏+]

标签：data int 基类 stride ace 利用 mode rac super

利用pytorch 定义自己的网络模型时，需要继承toch.nn.Module 基类。

基类中有parameters()、modules()、children()等方法

import torch
import torch.nn as nn

class myModel(nn.Module):
    def __init__(self, num_classes):
        super(myModel, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=3, padding=1),
                                  nn.BatchNorm2d(64),
                                  nn.ReLU(True))
        self.conv2 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=3, padding=1),
                                  nn.BatchNorm2d(128),
                                  nn.ReLU(True))
        
        self.conv3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        
        self.avgpool = nn.AvgPool2d(2)
        
        self.fc = nn.Linear(5*5*128, num_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        
        return x

看一下parameters方法

mymodel = myModel(100)


for m in mymodel.parameters():
    print(‘---------------‘)
    print(m.name, m.shape)

>>>---------------
None torch.Size([64, 3, 3, 3])
---------------
None torch.Size([64])
---------------
None torch.Size([64])
---------------
None torch.Size([64])
---------------
None torch.Size([128, 64, 3, 3])
---------------
None torch.Size([128])
---------------
None torch.Size([128])


list(mymodel.parameters())
>>>[Parameter containing:
 tensor([[[[ 0.1143,  0.1445,  0.0634],
           [-0.1294, -0.1618,  0.0916],
           [-0.1492, -0.0222,  0.1498]],
 
          [[-0.1576, -0.0599,  0.0668],
           [ 0.0777,  0.1712, -0.1479],
           [-0.0921, -0.0166, -0.1750]],

看一下modules()方法

for m in mymodel.modules():
    print(‘---------------‘)
    print(m)


---------------
myModel(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (conv2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (conv3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (fc): Linear(in_features=3200, out_features=100, bias=True)
)
---------------
Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
---------------
Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
---------------
BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
---------------
ReLU(inplace=True)
---------------
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
---------------
Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
---------------
BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
---------------
ReLU(inplace=True)
---------------
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
---------------
AvgPool2d(kernel_size=2, stride=2, padding=0)
---------------
Linear(in_features=3200, out_features=100, bias=True)

看一下children()方法

for m in mymodel.children():
    print(‘---------------‘)
    print(m)


---------------
Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
---------------
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
)
---------------
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
---------------
AvgPool2d(kernel_size=2, stride=2, padding=0)
---------------
Linear(in_features=3200, out_features=100, bias=True)

比较一下chiildren() 方法和 modules() 方法

model.modules()会遍历model中所有的子层，而model.children()仅会遍历当前层，如上所示

所以在进行参数初始化的时候，需要运用self.modules() 【类内初始化】或者model.modules()【类外初始化】，这样可以保证初始化所以的参数

初始化w ： weight.data.具体方式（normal_、fill_(1)、zero_()）

初始化b ： bias.data.具体方式（normal_、fill_(1)、zero_()）

for m in self.modules():
    if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
    elif isinstance(m, nn.BatchNorm2D):
        m.weight.data.fill_(1)
        m.bias.data.zero_()

nn.init.kaiming_normal_

def initialize_weights(*models):
    for model in models:
        for m in model.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight.data, nonlinearity=‘relu‘)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1.)
                m.bias.data.fill_(1e-4)
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0.0, 0.0001)
                m.bias.data.zero_()

还有一个常用的方法来设置参数是否需要反向传播

model.parameters().requires_grad = False

其他获取模型信息方法

 mymodel.fc
>>>Linear(in_features=3200, out_features=100, bias=True)

 mymodel.fc.in_features
>>>3200

mymodel.conv3.in_channels
>>>128

pytorch 参数初始化

标签：data int 基类 stride ace 利用 mode rac super

原文地址：https://www.cnblogs.com/learningcaiji/p/13922867.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行