pytorch 中的 softmax 函数功能和具体的 c++ 实现如下:

softmax 函数功能

torch.nn.functional.softmax(input, dim) 这个函数的功能对张量中的指定维度的每个元素缩放到(0,1)区间且和为1,也就是 归一化。

示例:

>>> import torch
>>> from torch import nn as nn
>>> input = torch.randn(2, 3, 4)
>>> input
tensor([[[-0.1335,  0.1574, -0.4618, -0.1629],
         [-1.1302, -0.2782,  0.2689,  1.4722],
         [ 1.8547,  3.0593,  1.7146, -0.4395]],

        [[-0.0102,  1.4679,  0.0138,  0.5245],
         [ 2.2345,  2.0089,  2.0074,  0.4197],
         [-1.4187, -0.0887,  0.9257,  0.2516]]])

>>> torch.nn.functional.softmax(input)
tensor([[[0.4692, 0.2124, 0.3833, 0.3346],
         [0.0334, 0.0922, 0.1495, 0.7413],
         [0.9635, 0.9588, 0.6876, 0.3338]],

        [[0.5308, 0.7876, 0.6167, 0.6654],
         [0.9666, 0.9078, 0.8505, 0.2587],
         [0.0365, 0.0412, 0.3124, 0.6662]]])
>>>
>>> torch.nn.functional.softmax(input, dim=0)
tensor([[[0.4692, 0.2124, 0.3833, 0.3346],
         [0.0334, 0.0922, 0.1495, 0.7413],
         [0.9635, 0.9588, 0.6876, 0.3338]],

        [[0.5308, 0.7876, 0.6167, 0.6654],
         [0.9666, 0.9078, 0.8505, 0.2587],
         [0.0365, 0.0412, 0.3124, 0.6662]]])

>>> torch.nn.functional.softmax(input, dim=1)
tensor([[[0.1153, 0.0504, 0.0841, 0.1452],
         [0.0426, 0.0326, 0.1746, 0.7447],
         [0.8421, 0.9171, 0.7413, 0.1101]],

        [[0.0936, 0.3415, 0.0923, 0.3757],
         [0.8835, 0.5866, 0.6779, 0.3383],
         [0.0229, 0.0720, 0.2298, 0.2860]]])

>>> torch.nn.functional.softmax(input, dim=2)
tensor([[[0.2482, 0.3320, 0.1788, 0.2410],
         [0.0479, 0.1122, 0.1939, 0.6460],
         [0.1885, 0.6287, 0.1638, 0.0190]],

        [[0.1232, 0.5403, 0.1262, 0.2103],
         [0.3626, 0.2894, 0.2889, 0.0591],
         [0.0487, 0.1843, 0.5081, 0.2589]]])

其实简单的理解方法是:矩阵再内存中的存放其实就是多维数组,除了在最内侧的那个维度能够看到实际数据,其他维度看到的其实都是指向低维度的指针,而这些低维度指针其实也就是这些低维度的第一个数据。 我们来看实际的例子,

>>> torch.nn.functional.softmax(input, dim=0)
tensor([[[0.4692, 0.2124, 0.3833, 0.3346],
         [0.0334, 0.0922, 0.1495, 0.7413],
         [0.9635, 0.9588, 0.6876, 0.3338]],

        [[0.5308, 0.7876, 0.6167, 0.6654],
         [0.9666, 0.9078, 0.8505, 0.2587],
         [0.0365, 0.0412, 0.3124, 0.6662]]])

# dim = 0,是最高维,从最高维看进去,他包含的只有指向两个子数组的指针,所以为了实现简单,只计算这两个指针指向的数据,并向后依次推进。
# 0.4692 + 0.5308 = 0.2124 + 0.7876 = 。。。。
[   [[0.4692, 0.2124, 0.3833, 0.3346], [0.0334, 0.0922, 0.1495, 0.7413], [0.9635, 0.9588, 0.6876, 0.3338]],
    [[0.5308, 0.7876, 0.6167, 0.6654], [0.9666, 0.9078, 0.8505, 0.2587], [0.0365, 0.0412, 0.3124, 0.6662]]
]
>>> torch.nn.functional.softmax(input, dim=1)
tensor([[[0.1153, 0.0504, 0.0841, 0.1452],
         [0.0426, 0.0326, 0.1746, 0.7447],
         [0.8421, 0.9171, 0.7413, 0.1101]],

        [[0.0936, 0.3415, 0.0923, 0.3757],
         [0.8835, 0.5866, 0.6779, 0.3383],
         [0.0229, 0.0720, 0.2298, 0.2860]]])

# dim = 1,是这个维度看进去,他包含的只有指向三个子数组的指针,所以为了实现简单,只计算这三个指针指向的数据,并向后依次推进。
# 0.1153 + 0.0426 + 0.8421 = 0.0936 + 0.8835 + 0.0229 = 0.0504 + 0.0326 + 0.9171 = 。。。
[   [   [0.1153, 0.0504, 0.0841, 0.1452],
        [0.0426, 0.0326, 0.1746, 0.7447],
        [0.8421, 0.9171, 0.7413, 0.1101]
    ],

    [   [0.0936, 0.3415, 0.0923, 0.3757],
        [0.8835, 0.5866, 0.6779, 0.3383],
        [0.0229, 0.0720, 0.2298, 0.2860]
    ]
]
>>> torch.nn.functional.softmax(input, dim=2)
tensor([[[0.2482, 0.3320, 0.1788, 0.2410],
         [0.0479, 0.1122, 0.1939, 0.6460],
         [0.1885, 0.6287, 0.1638, 0.0190]],

        [[0.1232, 0.5403, 0.1262, 0.2103],
         [0.3626, 0.2894, 0.2889, 0.0591],
         [0.0487, 0.1843, 0.5081, 0.2589]]])

# dim = 2,是这个维度看进去,他包含的是四个具体数据,只计算这四个数据,并向后依次推进。
# 0.2482 + 0.3320 + 0.1788 + 0.2410 = 0.0479 + 0.1122 + 0.1939 + 0.6460 = 。。。
[   [   [0.2482, 0.3320, 0.1788, 0.2410],
        [0.0479, 0.1122, 0.1939, 0.6460],
        [0.1885, 0.6287, 0.1638, 0.0190]
    ],

    [   [0.1232, 0.5403, 0.1262, 0.2103],
        [0.3626, 0.2894, 0.2889, 0.0591],
        [0.0487, 0.1843, 0.5081, 0.2589]
    ]
]

参考: https://blog.csdn.net/weixin_42280069/article/details/103757742 https://www.cnblogs.com/wanghui-garcia/p/10675588.html https://blog.csdn.net/m0_46653437/article/details/111610571

c++ 实现

c++ 实现 softmax 又很多种方式,下面展示几种实现方法。

1. 基本原理实现
Vector y = mlp(x); // output of the neural network without softmax activation function
for(int f = 0; f < y.rows(); f++)
  y(f) = exp(y(f));
y /= y.sum();

这个方法又缺陷,因为 exp 很容易超出最大值,所以引出了改进方法,就是先取最大值,然后用每个值去减最大值,弄成负的,就不会超过最大值了。 类似下面这样:

Vector y = mlp(x); // output of the neural network without softmax activation function
double ymax = maximal component of y
for(int f = 0; f < y.rows(); f++)
  y(f) = exp(y(f) - ymax);
y /= y.sum();

参考: https://stackoverflow.com/questions/9906136/implementation-of-a-softmax-activation-function-for-neural-networks https://lingpipe-blog.com/2009/06/25/log-sum-of-exponentials/

2.
double myfunction(double num) {
    return exp(num);
}

template <typename T>
void softmax(const typename::std::vector<T> &v, typename::std::vector<T> &s){
    double sum=0.0;
    transform(v.begin(), v.end(), s.begin(), myfunction);
    sum=accumulate(s.begin(), s.end(), sum);
    for(size_t i=0; i<s.size(); ++i)
        s.at(i)/=sum;
}

这个就是上面的 c++ 详细实现,又上溢出的风险。 参考: https://www.cnblogs.com/donggongdechen/p/11049648.html

3.
// ========================= Activation Function: softmax =====================
template<typename _Tp>
int activation_function_softmax(const _Tp* src, _Tp* dst, int length)
{
    const _Tp alpha = *std::max_element(src, src + length);
    _Tp denominator{ 0 };

    for (int i = 0; i < length; ++i) {
        dst[i] = std::exp(src[i] - alpha);
        denominator += dst[i];
    }

    for (int i = 0; i < length; ++i) {
        dst[i] /= denominator;
    }

    return 0;
}

这个就是先去最大值,然后负数做 exp。 参考: https://blog.csdn.net/fengbingchun/article/details/75220591 https://github.com/fengbingchun/NN_Test

4.
template <typename It>
void softmax (It beg, It end)
{
  using VType = typename std::iterator_traits<It>::value_type;

  static_assert(std::is_floating_point<VType>::value,
                "Softmax function only applicable for floating types");

  auto max_ele { *std::max_element(beg, end) };

  std::transform(
      beg,
      end,
      beg,
      [&](VType x){ return std::exp(x - max_ele); });

  VType exptot = std::accumulate(beg, end, 0.0);

  std::transform(
      beg,
      end,
      beg,
      std::bind2nd(std::divides<VType>(), exptot));  
}

类似于上面,但是更多使用了 stl 里面的函数。还可以进一步完善以下几个地方:

template <typename IterIn, typename IterOut = IterIn>
void softmax (IterIn beg, IterIn end, IterOut dest = beg)

  auto const max_ele { *std::max_element(beg, end) };
  VType const exptot = std::accumulate(beg, end, 0.0);

VType const exptot = std::accumulate<IterIn, VType>(beg, end, 0.0);
VType const exptot = std::accumulate(beg, end, VType{});

  VType exptot = 0;

  std::transform(
      beg,
      end,
      beg,
      [&](VType x){ auto ex = std::exp(x - max_ele); exptot += ex; return ex; });

参考: https://codereview.stackexchange.com/questions/177973/softmax-function-implementation

5.
static void softmax(float *input, int input_len)
{
    assert (input != NULL);
    assert (input_len != 0);
    int i;
    float m;
    /* Find maximum value from input array */
    m = input[0];
    for (i = 1; i < input_len; i++) {
        if (input[i] > m) {
            m = input[i];
        }
    }

    float sum = 0;
    for (i = 0; i < input_len; i++) {
        sum += expf(input[i]-m);
    }

    for (i = 0; i < input_len; i++) {
        input[i] = expf(input[i] - m - log(sum));

    }    
}
static void softmax(float *input, size_t input_len) {
  assert(input);
  // assert(input_len >= 0);  Not needed

  float m = -INFINITY;
  for (size_t i = 0; i < input_len; i++) {
    if (input[i] > m) {
      m = input[i];
    }
  }

  float sum = 0.0;
  for (size_t i = 0; i < input_len; i++) {
    sum += expf(input[i] - m);
  }

  float offset = m + logf(sum);
  for (size_t i = 0; i < input_len; i++) {
    input[i] = expf(input[i] - offset);
  }
}

这上面是使用 log 的。下面也是:

void softmax(double* input, size_t size) {

    assert(0 <= size <= sizeof(input) / sizeof(double));

    int i;
    double m, sum, constant;

    m = -INFINITY;
    for (i = 0; i < size; ++i) {
        if (m < input[i]) {
            m = input[i];
        }
    }

    sum = 0.0;
    for (i = 0; i < size; ++i) {
        sum += exp(input[i] - m);
    }

    constant = m + log(sum);
    for (i = 0; i < size; ++i) {
        input[i] = exp(input[i] - constant);
    }

}

使用 vecotr<float32_t> b = {1., 2., 3., 4.} 进行测试,使用 log 和不适用 log,结果基本一致。 参考: https://codereview.stackexchange.com/questions/180467/implementing-softmax-in-c# https://slaystudy.com/implementation-of-softmax-activation-function-in-c-c/

6. 记过统合测试后,整理出来的可用代码

template <typename IterIn, typename IterOut = IterIn>
int softmax(IterIn begin, IterIn end, IterOut dest)
{
    using VType = typename std::iterator_traits<IterIn>::value_type;

    auto const max_ele {* std::max_element(begin, end)};

    //VType exptot = 0;
    VType exptot = VType{};
    std::transform(begin, end, dest,
           [&](VType x) {auto ex = std::exp(x - max_ele); exptot += ex; return ex;});

    /* Todo: dest_end ? end */
    IterOut dest_end = end;
    std::transform(dest, dest_end, dest,
           std::bind2nd(std::divides<VType>(), exptot));

    return 0;
}

template <typename IterIn>
int softmax(IterIn begin, IterIn end)
{
    return softmax(begin, end, begin);
}

如果希望能像 pytorch 里面的 softmax 那样对具体的 axis 进行相应操作,需要引入 xtensor,类似下面这样使用:

    // scores 数据组织形式类似: {{{1, 2}, {3, 4}}}
    vector<int> scoresShape = {1, scoresSize / 2, 2};
    auto scoresArray = adapt((float32_t *)scoresData,
                scoresSize,
                no_ownership(), scoresShape);
    cout << scoresArray << endl;

    auto scoresIter = axis_slice_begin(scoresArray, 2u);
    auto scoresEnd = axis_slice_end(scoresArray, 2u);
    while (scoresIter != scoresEnd)
    {
    softmax((*scoresIter).begin(), (*scoresIter).end());
    scoresIter++;
    }

    cout << scoresArray << endl;

标签: math

添加新评论