From 4f791902098dac9d514559b5eda07c27095343fa Mon Sep 17 00:00:00 2001 From: Aghagolzadeh Date: Thu, 8 Nov 2018 16:42:11 -0800 Subject: [PATCH 1/6] Update BlockMomentumDistributedLearner.h updated BMUF --> ResetBuffer to accept float16 type --- Source/1BitSGD/BlockMomentumDistributedLearner.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/1BitSGD/BlockMomentumDistributedLearner.h b/Source/1BitSGD/BlockMomentumDistributedLearner.h index 1585bd4c2588..a76dbaf10278 100644 --- a/Source/1BitSGD/BlockMomentumDistributedLearner.h +++ b/Source/1BitSGD/BlockMomentumDistributedLearner.h @@ -524,6 +524,8 @@ namespace CNTK ResetBuffer(i, p); else if (p->GetDataType() == DataType::Float) ResetBuffer(i, p); + else if (p->GetDataType() == DataType::Float16) + ResetBuffer(i, p); else RuntimeError("Unsupported type."); } From 17a02f49e6f91833d82b56060fb462f2d4375604 Mon Sep 17 00:00:00 2001 From: Aghagolzadeh Date: Fri, 9 Nov 2018 09:48:15 -0800 Subject: [PATCH 2/6] Update BlockMomentumDistributedLearner.h --- .../1BitSGD/BlockMomentumDistributedLearner.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Source/1BitSGD/BlockMomentumDistributedLearner.h b/Source/1BitSGD/BlockMomentumDistributedLearner.h index a76dbaf10278..f29713b299fd 100644 --- a/Source/1BitSGD/BlockMomentumDistributedLearner.h +++ b/Source/1BitSGD/BlockMomentumDistributedLearner.h @@ -525,39 +525,39 @@ namespace CNTK else if (p->GetDataType() == DataType::Float) ResetBuffer(i, p); else if (p->GetDataType() == DataType::Float16) - ResetBuffer(i, p); + ResetBuffer(i, p); else RuntimeError("Unsupported type."); } } - template + template void ResetBuffer(size_t index, const NDArrayViewPtr& p) { - auto data = p->GetMatrix(); + auto data = p->GetMatrix(); if (!m_blockLevelSmoothedGradient[index]) { // has not been initialized yet - auto pSmoothedGrad = std::make_shared(AsDataType(), p->Shape(), AsDeviceDescriptor(data->GetDeviceId())); - pSmoothedGrad->SetValue(static_cast(0)); + auto pSmoothedGrad = std::make_shared(AsDataType(), p->Shape(), AsDeviceDescriptor(data->GetDeviceId())); + pSmoothedGrad->SetValue(static_cast(0)); m_blockLevelSmoothedGradient[index] = pSmoothedGrad; } if (!m_prevParameters[index]) { - NDArrayViewPtr newValue = std::make_shared(AsDataType(), p->Shape(), AsDeviceDescriptor(data->GetDeviceId())); - std::shared_ptr> newData = newValue->GetWritableMatrix(); + NDArrayViewPtr newValue = std::make_shared(AsDataType(), p->Shape(), AsDeviceDescriptor(data->GetDeviceId())); + std::shared_ptr> newData = newValue->GetWritableMatrix(); newData->SetValue(*data); m_prevParameters[index] = newValue; } else { - m_prevParameters[index]->GetWritableMatrix()->SetValue(*data); + m_prevParameters[index]->GetWritableMatrix()->SetValue(*data); } if (!m_tempBlockGradient[index]) { - m_tempBlockGradient[index] = std::make_shared(AsDataType(), p->Shape(), AsDeviceDescriptor(data->GetDeviceId())); + m_tempBlockGradient[index] = std::make_shared(AsDataType(), p->Shape(), AsDeviceDescriptor(data->GetDeviceId())); } } From 3e6eef529e17fc6bcfa6f5094b1e7fda5318915e Mon Sep 17 00:00:00 2001 From: Aghagolzadeh Date: Fri, 9 Nov 2018 10:02:55 -0800 Subject: [PATCH 3/6] Update BlockMomentumDistributedLearner.h corrected type for GetMatrix and GetWritableMatrix --- Source/1BitSGD/BlockMomentumDistributedLearner.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/1BitSGD/BlockMomentumDistributedLearner.h b/Source/1BitSGD/BlockMomentumDistributedLearner.h index f29713b299fd..d0366cbac94e 100644 --- a/Source/1BitSGD/BlockMomentumDistributedLearner.h +++ b/Source/1BitSGD/BlockMomentumDistributedLearner.h @@ -534,7 +534,7 @@ namespace CNTK template void ResetBuffer(size_t index, const NDArrayViewPtr& p) { - auto data = p->GetMatrix(); + auto data = p->GetMatrix(); if (!m_blockLevelSmoothedGradient[index]) { // has not been initialized yet @@ -552,7 +552,7 @@ namespace CNTK } else { - m_prevParameters[index]->GetWritableMatrix()->SetValue(*data); + m_prevParameters[index]->GetWritableMatrix()->SetValue(*data); } if (!m_tempBlockGradient[index]) From 54f8f14d023906cb62bd4d34d6a553fc2dd3e62d Mon Sep 17 00:00:00 2001 From: Aghagolzadeh Date: Mon, 12 Nov 2018 11:15:39 -0800 Subject: [PATCH 4/6] Update DistributedCommunicator.cpp added log for fp16 aggregation --- Source/CNTKv2LibraryDll/DistributedCommunicator.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/CNTKv2LibraryDll/DistributedCommunicator.cpp b/Source/CNTKv2LibraryDll/DistributedCommunicator.cpp index f37394aab64b..0cabaef81374 100644 --- a/Source/CNTKv2LibraryDll/DistributedCommunicator.cpp +++ b/Source/CNTKv2LibraryDll/DistributedCommunicator.cpp @@ -14,6 +14,7 @@ #include "GPUDataTransferer.h" #include #include "Utils.h" +#include using namespace Microsoft::MSR::CNTK; @@ -732,6 +733,7 @@ namespace CNTK { if (m_nccl->IsSupported() && !dataOnCPU) { + std::cerr << " NCCL fp16 allreduce" << endl; m_nccl->AllReduce(inputData, outputData, numElements, op); return; From 4a620e70f43661acd6eced230fd9e12b95efb38b Mon Sep 17 00:00:00 2001 From: rpengms <40006668+rpengms@users.noreply.github.com> Date: Fri, 16 Nov 2018 11:29:02 -0800 Subject: [PATCH 5/6] Fix DumpNode crash --- Source/Common/Include/Sequences.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Source/Common/Include/Sequences.h b/Source/Common/Include/Sequences.h index 2571fd6f5499..d6e330e6f63f 100644 --- a/Source/Common/Include/Sequences.h +++ b/Source/Common/Include/Sequences.h @@ -546,7 +546,8 @@ struct MBLayout LogicError("GetColumnIndex: t out of sequence bounds."); if (seq.s > GetNumParallelSequences()) LogicError("GetColumnIndex: seq.s out of sequence bounds."); // can only happen if 'seq' does not come out of our own m_sequences array, which is verboten - ptrdiff_t tIn = (ptrdiff_t)t + seq.tBegin; // shifted time index + //ptrdiff_t tIn = (ptrdiff_t)t + seq.tBegin; // shifted time index + ptrdiff_t tIn = (ptrdiff_t)t + (seq.tBegin > 0 ? seq.tBegin : 0 ); // shifted time index if (tIn < 0 || (size_t)tIn >= GetNumTimeSteps()) LogicError("GetColumnIndex: Attempted to access a time step that is accessing a portion of a sequence that is not included in current minibatch."); // we may encounter this for truncated BPTT size_t col = (size_t)tIn * GetNumParallelSequences() + seq.s; From c298985660fef935320220bfe79c475aac72c923 Mon Sep 17 00:00:00 2001 From: Aghagolzadeh Date: Mon, 19 Nov 2018 12:16:07 -0800 Subject: [PATCH 6/6] Update progress_print.py will add absolute time and date to each log output --- bindings/python/cntk/logging/progress_print.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bindings/python/cntk/logging/progress_print.py b/bindings/python/cntk/logging/progress_print.py index 4d2649fc501c..199e2b9c8f44 100644 --- a/bindings/python/cntk/logging/progress_print.py +++ b/bindings/python/cntk/logging/progress_print.py @@ -220,6 +220,8 @@ def write(self, key, value): def ___logprint(self, logline): if self.log_to_file == None: # to stdout. if distributed, all ranks merge output into stdout + t = time.localtime() + print(str(t.tm_year) + '-' + str(t.tm_mon) + '-' + str(t.tm_mday) + ' ' + str(t.tm_hour) + ':' + str(t.tm_min) + ':' + str(t.tm_sec), end = ' ') print(logline) else: # to named file. if distributed, one file per rank