Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 13 additions & 12 deletions .github/workflows/performance-regression-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,25 @@ on:
# - cron: '0 16 * * *'
issue_comment:
types: [created]
pull_request:
branches: [ main ]
paths:
- '.github/workflows/performance-regression-test.yml'

jobs:
performace-regression-test:
runs-on: [Rohan, ubuntu-20.04-lts]
if: github.event.pull_request || github.event.schedule || github.event.issue.pull_request && github.event.comment.body=='APRT'
if: >-
github.event.issue.pull_request
&& github.event.comment.body == 'APRT'
&& (
github.event.comment.author_association == 'MEMBER'
|| github.event.comment.author_association == 'OWNER'
|| github.event.comment.author_association == 'COLLABORATOR'
)
steps:
# trigged by opening a PR which modifies this file or scheduling or commenting 'APRT' in a closed PR
- name: checkout (open pr or schedule or closed pr comments)
if: github.event.pull_request || github.event.schedule || github.event.issue.pull_request.merged_at
- name: checkout (merged pr comments)
if: github.event.issue.pull_request.merged_at
uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3

# trigged by commenting 'APRT' in an opening PR
- name: checkout (opening pr comments)
if: github.event.issue.pull_request && !github.event.issue.pull_request.merged_at
- name: checkout (open pr comments)
if: "!github.event.issue.pull_request.merged_at"
uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
with:
ref: ${{ format('refs/pull/{0}/merge', github.event.issue.number) }}
Expand Down Expand Up @@ -62,5 +63,5 @@ jobs:
COMMENT_URL: ${{ github.event.comment.html_url }}
JOB_URL: https://github.com/intel-analytics/BigDL/actions/runs/${{ github.run_id }}
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
IS_PR: ${{ github.event.pull_request != null || github.event.issue.pull_request != null }}
IS_PR: ${{ github.event.issue.pull_request != null }}
IS_COMMENTS: ${{ github.event.issue.pull_request != null }}
75 changes: 53 additions & 22 deletions python/friesian/src/bigdl/friesian/utils/safepickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.
#

import os
import pickle
import hmac
import hashlib
Expand All @@ -24,31 +25,61 @@


class SafePickle:
key = b'shared-key'
"""
Example:
>>> from bigdl.friesian.utils import SafePickle
>>> with open(file_path, 'wb') as file:
>>> signature = SafePickle.dump(data, file, return_digest=True)
>>> with open(file_path, 'rb') as file:
>>> data = SafePickle.load(file, signature)
"""
_key = None

@classmethod
def _get_key(cls):
if cls._key is None:
env_key = os.environ.get('BIGDL_SAFE_PICKLE_KEY')
if env_key:
cls._key = bytes.fromhex(env_key)
else:
cls._key = os.urandom(32)
os.environ['BIGDL_SAFE_PICKLE_KEY'] = cls._key.hex()
return cls._key

@classmethod
def dump(self, obj, file, return_digest=False, *args, **kwargs):
def dump(cls, obj, file, return_digest=False, *args, **kwargs):
"""
Example:
>>> from bigdl.friesian.utils import SafePickle
>>> with open(file_path, 'wb') as file:
>>> SafePickle.dump(data, file)
"""
pickled_data = pickle.dumps(obj)
file.write(pickled_data)
digest = hmac.new(cls._get_key(), pickled_data, hashlib.sha256).hexdigest()
if return_digest:
pickled_data = pickle.dumps(obj)
file.write(pickled_data)
digest = hmac.new(self.key, pickled_data, hashlib.sha1).hexdigest()
return digest
else:
pickle.dump(obj, file, *args, **kwargs)
sig_path = file.name + '.sig'
with open(sig_path, 'w') as sig_file:
sig_file.write(digest)

@classmethod
def load(self, file, digest=None, *args, **kwargs):
def load(cls, file, digest=None, *args, **kwargs):
"""
Example:
>>> from bigdl.friesian.utils import SafePickle
>>> with open(file_path, 'rb') as file:
>>> data = SafePickle.load(file)
"""
content = file.read()

if digest is None:
sig_path = file.name + '.sig'
if os.path.exists(sig_path):
with open(sig_path, 'r') as sig_file:
digest = sig_file.read().strip()

if digest:
content = file.read()
new_digest = hmac.new(self.key, content, hashlib.sha1).hexdigest()
if digest != new_digest:
invalidInputError(False, 'Pickle safe check failed')
file.seek(0)
return pickle.load(file, *args, **kwargs)
new_digest = hmac.new(cls._get_key(), content, hashlib.sha256).hexdigest()
if not hmac.compare_digest(digest, new_digest):
invalidInputError(False,
'Pickle integrity check failed: '
'file may have been tampered with')
else:
invalidInputError(False,
'No HMAC signature found for pickle file: '
'cannot verify integrity')

return pickle.loads(content, *args, **kwargs)
8 changes: 4 additions & 4 deletions python/nano/src/bigdl/nano/deps/horovod/horovod_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@
#

import os
import cloudpickle
import sys
from bigdl.nano.utils.common import SafePickle


if __name__ == '__main__':
temp_dir = sys.argv[1]

with open(os.path.join(temp_dir, "args.pkl"), 'rb') as f:
args = cloudpickle.load(f)
args = SafePickle.load(f)

with open(os.path.join(temp_dir, "target.pkl"), 'rb') as f:
target = cloudpickle.load(f)
target = SafePickle.load(f)

import horovod.tensorflow.keras as hvd
hvd.init()
Expand All @@ -36,4 +36,4 @@

with open(os.path.join(temp_dir,
f"history_{idx}"), "wb") as f:
cloudpickle.dump(history, f)
SafePickle.dump(history, f)
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import cloudpickle
import os
import multiprocessing
import subprocess
Expand All @@ -46,6 +45,7 @@
from bigdl.nano.pytorch.strategies.ddp_spawn import DDPSpawnStrategy, _DDPSpawnLauncher
from bigdl.nano.utils.common import schedule_processors
from bigdl.nano.utils.common import invalidInputError
from bigdl.nano.utils.common import SafePickle
from bigdl.nano.pytorch.dispatcher import _get_patch_status

import logging
Expand Down Expand Up @@ -100,9 +100,9 @@ def launch(self, function: Callable, *args: Any,
# `self._wrapping_function`.
with open(os.path.join(temp_dir, "args.pkl"), "wb") as f:
if trainer is not None:
cloudpickle.dump((None, args, error_queue), f)
SafePickle.dump((None, args, error_queue), f)
else:
cloudpickle.dump((self._wrapping_function, args, error_queue), f)
SafePickle.dump((self._wrapping_function, args, error_queue), f)

# we also need to pass sys.path to subprocess
with open(os.path.join(temp_dir, "sys_path.json"), "w") as f:
Expand Down
4 changes: 2 additions & 2 deletions python/nano/src/bigdl/nano/pytorch/strategies/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
import sys
import json

import cloudpickle
import multiprocessing
from torch.multiprocessing.spawn import _wrap
from bigdl.nano.pytorch.dispatcher import patch_torch
from bigdl.nano.utils.common import SafePickle


if __name__ == '__main__':
Expand All @@ -44,7 +44,7 @@
patch_torch(cuda_to_cpu=patch_status['patch_cuda'])

with open(os.path.join(temp_dir, "args.pkl"), "rb") as f:
(fn, args, error_queue) = cloudpickle.load(f)
(fn, args, error_queue) = SafePickle.load(f)

# args[0] is `trainer`, when it is None, it means when are using LightningLite,
# otherwise we are using trainer, for the details here, see `ddp_subprocess.py`
Expand Down
75 changes: 53 additions & 22 deletions python/nano/src/bigdl/nano/utils/common/safepickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.
#

import os
import pickle
import hmac
import hashlib
Expand All @@ -24,31 +25,61 @@


class SafePickle:
key = b'shared-key'
"""
Example:
>>> from bigdl.nano.utils.common import SafePickle
>>> with open(file_path, 'wb') as file:
>>> signature = SafePickle.dump(data, file, return_digest=True)
>>> with open(file_path, 'rb') as file:
>>> data = SafePickle.load(file, signature)
"""
_key = None

@classmethod
def _get_key(cls):
if cls._key is None:
env_key = os.environ.get('BIGDL_SAFE_PICKLE_KEY')
if env_key:
cls._key = bytes.fromhex(env_key)
else:
cls._key = os.urandom(32)
os.environ['BIGDL_SAFE_PICKLE_KEY'] = cls._key.hex()
return cls._key

@classmethod
def dump(self, obj, file, return_digest=False, *args, **kwargs):
def dump(cls, obj, file, return_digest=False, *args, **kwargs):
"""
Example:
>>> from bigdl.nano.utils.common import SafePickle
>>> with open(file_path, 'wb') as file:
>>> SafePickle.dump(data, file)
"""
pickled_data = pickle.dumps(obj)
file.write(pickled_data)
digest = hmac.new(cls._get_key(), pickled_data, hashlib.sha256).hexdigest()
if return_digest:
pickled_data = pickle.dumps(obj)
file.write(pickled_data)
digest = hmac.new(self.key, pickled_data, hashlib.sha1).hexdigest()
return digest
else:
pickle.dump(obj, file, *args, **kwargs)
sig_path = file.name + '.sig'
with open(sig_path, 'w') as sig_file:
sig_file.write(digest)

@classmethod
def load(self, file, digest=None, *args, **kwargs):
def load(cls, file, digest=None, *args, **kwargs):
"""
Example:
>>> from bigdl.nano.utils.common import SafePickle
>>> with open(file_path, 'rb') as file:
>>> data = SafePickle.load(file)
"""
content = file.read()

if digest is None:
sig_path = file.name + '.sig'
if os.path.exists(sig_path):
with open(sig_path, 'r') as sig_file:
digest = sig_file.read().strip()

if digest:
content = file.read()
new_digest = hmac.new(self.key, content, hashlib.sha1).hexdigest()
if digest != new_digest:
invalidInputError(False, 'Pickle safe check failed')
file.seek(0)
return pickle.load(file, *args, **kwargs)
new_digest = hmac.new(cls._get_key(), content, hashlib.sha256).hexdigest()
if not hmac.compare_digest(digest, new_digest):
invalidInputError(False,
'Pickle integrity check failed: '
'file may have been tampered with')
else:
invalidInputError(False,
'No HMAC signature found for pickle file: '
'cannot verify integrity')

return pickle.loads(content, *args, **kwargs)
8 changes: 4 additions & 4 deletions python/nano/src/bigdl/nano/utils/tf/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@ def run(self, target, args=..., nprocs=1, envs=None) -> Any:
return self.run_subprocess(target, args=args, nprocs=nprocs, envs=envs)

def run_subprocess(self, target, args=..., nprocs=1, envs=None) -> Any:
import cloudpickle
from bigdl.nano.utils.common import SafePickle
import subprocess
import sys

with TemporaryDirectory() as temp_dir:
with open(os.path.join(temp_dir, "args.pkl"), 'wb') as f:
cloudpickle.dump(args, f)
SafePickle.dump(args, f)
with open(os.path.join(temp_dir, "target.pkl"), 'wb') as f:
cloudpickle.dump(target, f)
SafePickle.dump(target, f)

ex_list = []
cwd_path = os.path.dirname(__file__)
Expand All @@ -72,5 +72,5 @@ def run_subprocess(self, target, args=..., nprocs=1, envs=None) -> Any:
results = []
for i in range(nprocs):
with open(os.path.join(temp_dir, f"history_{i}"), "rb") as f:
results.append(cloudpickle.load(f))
results.append(SafePickle.load(f))
return results
8 changes: 4 additions & 4 deletions python/nano/src/bigdl/nano/utils/tf/subprocess_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@

import json
import os
import cloudpickle
import sys
import tensorflow as tf
from bigdl.nano.utils.common import SafePickle

if __name__ == '__main__':
# Set number of threads in subprocess
Expand All @@ -27,14 +27,14 @@
temp_dir = sys.argv[1]

with open(os.path.join(temp_dir, "args.pkl"), 'rb') as f:
args = cloudpickle.load(f)
args = SafePickle.load(f)

with open(os.path.join(temp_dir, "target.pkl"), 'rb') as f:
target = cloudpickle.load(f)
target = SafePickle.load(f)

history = target(*args)
tf_config = json.loads(os.environ["TF_CONFIG"])

with open(os.path.join(temp_dir,
f"history_{tf_config['task']['index']}"), "wb") as f:
cloudpickle.dump(history, f)
SafePickle.dump(history, f)
Loading
Loading