mverleg · hmaarrfk · Jan 1, 2024 · Jan 2, 2024 · Jan 3, 2024 · Jan 3, 2024
diff --git a/json_tricks/decoders.py b/json_tricks/decoders.py
@@ -275,8 +275,16 @@ def json_numpy_obj_hook(dct):
 	"""
 	if not isinstance(dct, dict):
 		return dct
-	if not '__ndarray__' in dct:
+	if '__ndarray__' not in dct:
 		return dct
+	if 'shape' not in dct or (dct['shape'] == [] and not dct.get('0dim', False)):
+		# New style scalar encoding
+		return _decode_numpy_scalar(dct)
+	else:
+		return _decode_ndarray(dct)
+
+
+def _decode_ndarray(dct):
 	try:
 		import numpy
 	except ImportError:
@@ -297,7 +305,32 @@ def json_numpy_obj_hook(dct):
 		else:
 			return _lists_of_numbers_to_ndarray(data_json, order, shape, nptype)
 	else:
-		return _scalar_to_numpy(data_json, nptype)
+		# This code path is mostly for 0-dimensional arrays
+		# numpy scalars are separately decoded
+		return numpy.asarray(
+			data_json,
+			dtype=nptype
+		).reshape(dct['shape'])
+
+
+def _decode_numpy_scalar(dct):
+	try:
+		import numpy
+	except ImportError:
+		raise NoNumpyException('Trying to decode a map which appears to represent a numpy '
+			'scalar, but numpy appears not to be installed.')
+
+	# numpy.asarray will handle dtypes with units well (such as datetime64)
+	arr = numpy.asarray(dct['__ndarray__'], dtype=dct['dtype'])
+
+	# https://numpy.org/doc/stable/reference/arrays.scalars.html#indexing
+	# https://numpy.org/doc/stable/user/basics.indexing.html#detailed-notes
+	# > An empty (tuple) index is a full scalar index into a zero-dimensional
+	#	array. x[()] returns a scalar if x is zero-dimensional and a view
+	#	otherwise. On the other hand, x[...] always returns a view.
+
+	scalar = arr[()]
+	return scalar
 
 
 def _bin_str_to_ndarray(data, order, shape, np_type_name, data_endianness):
@@ -354,15 +387,6 @@ def _lists_of_obj_to_ndarray(data, order, shape, dtype):
 	return arr
 
 
-def _scalar_to_numpy(data, dtype):
-	"""
-	From scalar value to numpy type.
-	"""
-	import numpy as nptypes
-	dtype = getattr(nptypes, dtype)
-	return dtype(data)
-
-
 def json_nonumpy_obj_hook(dct):
 	"""
 	This hook has no effect except to check if you're trying to decode numpy arrays without support, and give you a useful message.

diff --git a/json_tricks/encoders.py b/json_tricks/encoders.py
@@ -5,6 +5,7 @@
 from fractions import Fraction
 from functools import wraps
 from json import JSONEncoder
+from json.encoder import encode_basestring_ascii, encode_basestring, INFINITY
 import sys
 
 from .utils import hashodict, get_module_name_from_object, NoEnumException, NoPandasException, \
@@ -81,6 +82,54 @@ def default(self, obj, *args, **kwargs):
 					type(obj), self.__class__.__name__, ', '.join(str(encoder) for encoder in self.obj_encoders)))
 		return obj
 
+	def iterencode(self, o, _one_shot=False):
+		"""Encode the given object and yield each string
+		representation as available.
+
+		For example::
+
+			for chunk in JSONEncoder().iterencode(bigobject):
+				mysocket.write(chunk)
+
+		"""
+		if self.check_circular:
+			markers = {}
+		else:
+			markers = None
+		if self.ensure_ascii:
+			_encoder = encode_basestring_ascii
+		else:
+			_encoder = encode_basestring
+
+		def floatstr(o, allow_nan=self.allow_nan,
+				_repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
+			# Check for specials.  Note that this type of test is processor
+			# and/or platform-specific, so do tests which don't depend on the
+			# internals.
+
+			if o != o:
+				text = 'NaN'
+			elif o == _inf:
+				text = 'Infinity'
+			elif o == _neginf:
+				text = '-Infinity'
+			else:
+				return _repr(o)
+
+			if not allow_nan:
+				raise ValueError(
+					"Out of range float values are not JSON compliant: " +
+					repr(o))
+
+			return text
+
+
+		_iterencode = _make_iterencode(
+			markers, self.default, _encoder, self.indent, floatstr,
+			self.key_separator, self.item_separator, self.sort_keys,
+			self.skipkeys, _one_shot)
+		return _iterencode(o, 0)
+
 
 def json_date_time_encode(obj, primitives=False):
 	"""
@@ -375,7 +424,9 @@ def numpy_encode(obj, primitives=False, properties=None):
 
 	:param primitives: If True, arrays are serialized as (nested) lists without meta info.
 	"""
-	from numpy import ndarray, generic
+	from numpy import ndarray, generic, datetime64
+
+	scalar_types = (generic, datetime64)
 
 	if isinstance(obj, ndarray):
 		if primitives:
@@ -407,17 +458,19 @@ def numpy_encode(obj, primitives=False, properties=None):
 				('__ndarray__', data_json),
 				('dtype', str(obj.dtype)),
 				('shape', obj.shape),
+				('0dim', obj.ndim == 0),
 			))
 			if len(obj.shape) > 1:
 				dct['Corder'] = obj.flags['C_CONTIGUOUS']
 			if use_compact and store_endianness != 'suppress':
 				dct['endian'] = store_endianness or sys.byteorder
 			return dct
-	elif isinstance(obj, generic):
-		if NumpyEncoder.SHOW_SCALAR_WARNING:
-			NumpyEncoder.SHOW_SCALAR_WARNING = False
-			warnings.warn('json-tricks: numpy scalar serialization is experimental and may work differently in future versions')
-		return obj.item()
+	elif isinstance(obj, scalar_types):
+		return hashodict((
+			('__ndarray__', obj.item()),
+			('dtype', str(obj.dtype)),
+			('0dim', False),
+		))
 	return obj
 
 
@@ -476,3 +529,196 @@ def default(self, obj, *args, **kwargs):
 		warnings.warn('`NoNumpyEncoder` is deprecated, use `nonumpy_encode`', JsonTricksDeprecation)
 		obj = nonumpy_encode(obj)
 		return super(NoNumpyEncoder, self).default(obj, *args, **kwargs)
+
+def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
+		_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
+		## HACK: hand-optimized bytecode; turn globals into locals
+		ValueError=ValueError,
+		dict=dict,
+		float=float,
+		id=id,
+		int=int,
+		isinstance=isinstance,
+		list=list,
+		str=str,
+		tuple=tuple,
+		_intstr=int.__repr__,
+	):
+
+	try:
+		import numpy
+		def isfloatinstance(obj):
+			return isinstance(obj, float) and not isinstance(obj, numpy.number)
+	except ImportError:
+		def isfloatinstance(obj):
+			return isinstance(obj, float)
+
+	if _indent is not None and not isinstance(_indent, str):
+		_indent = ' ' * _indent
+
+	def _iterencode_list(lst, _current_indent_level):
+		if not lst:
+			yield '[]'
+			return
+		if markers is not None:
+			markerid = id(lst)
+			if markerid in markers:
+				raise ValueError("Circular reference detected")
+			markers[markerid] = lst
+		buf = '['
+		if _indent is not None:
+			_current_indent_level += 1
+			newline_indent = '\n' + _indent * _current_indent_level
+			separator = _item_separator + newline_indent
+			buf += newline_indent
+		else:
+			newline_indent = None
+			separator = _item_separator
+		first = True
+		for value in lst:
+			if first:
+				first = False
+			else:
+				buf = separator
+			if isinstance(value, str):
+				yield buf + _encoder(value)
+			elif value is None:
+				yield buf + 'null'
+			elif value is True:
+				yield buf + 'true'
+			elif value is False:
+				yield buf + 'false'
+			elif isinstance(value, int):
+				# Subclasses of int/float may override __repr__, but we still
+				# want to encode them as integers/floats in JSON. One example
+				# within the standard library is IntEnum.
+				yield buf + _intstr(value)
+			elif isfloatinstance(value):
+				# see comment above for int
+				yield buf + _floatstr(value)
+			else:
+				yield buf
+				if isinstance(value, (list, tuple)):
+					chunks = _iterencode_list(value, _current_indent_level)
+				elif isinstance(value, dict):
+					chunks = _iterencode_dict(value, _current_indent_level)
+				else:
+					chunks = _iterencode(value, _current_indent_level)
+				yield from chunks
+		if newline_indent is not None:
+			_current_indent_level -= 1
+			yield '\n' + _indent * _current_indent_level
+		yield ']'
+		if markers is not None:
+			del markers[markerid]
+
+	def _iterencode_dict(dct, _current_indent_level):
+		if not dct:
+			yield '{}'
+			return
+		if markers is not None:
+			markerid = id(dct)
+			if markerid in markers:
+				raise ValueError("Circular reference detected")
+			markers[markerid] = dct
+		yield '{'
+		if _indent is not None:
+			_current_indent_level += 1
+			newline_indent = '\n' + _indent * _current_indent_level
+			item_separator = _item_separator + newline_indent
+			yield newline_indent
+		else:
+			newline_indent = None
+			item_separator = _item_separator
+		first = True
+		if _sort_keys:
+			items = sorted(dct.items())
+		else:
+			items = dct.items()
+		for key, value in items:
+			if isinstance(key, str):
+				pass
+			# JavaScript is weakly typed for these, so it makes sense to
+			# also allow them.	Many encoders seem to do something like this.
+			elif isinstance(key, float):
+				# see comment for int/float in _make_iterencode
+				key = _floatstr(key)
+			elif key is True:
+				key = 'true'
+			elif key is False:
+				key = 'false'
+			elif key is None:
+				key = 'null'
+			elif isinstance(key, int):
+				# see comment for int/float in _make_iterencode
+				key = _intstr(key)
+			elif _skipkeys:
+				continue
+			else:
+				raise TypeError(f'keys must be str, int, float, bool or None, '
+								f'not {key.__class__.__name__}')
+			if first:
+				first = False
+			else:
+				yield item_separator
+			yield _encoder(key)
+			yield _key_separator
+			if isinstance(value, str):
+				yield _encoder(value)
+			elif value is None:
+				yield 'null'
+			elif value is True:
+				yield 'true'
+			elif value is False:
+				yield 'false'
+			elif isinstance(value, int):
+				# see comment for int/float in _make_iterencode
+				yield _intstr(value)
+			elif isfloatinstance(value):
+				# see comment for int/float in _make_iterencode
+				yield _floatstr(value)
+			else:
+				if isinstance(value, (list, tuple)):
+					chunks = _iterencode_list(value, _current_indent_level)
+				elif isinstance(value, dict):
+					chunks = _iterencode_dict(value, _current_indent_level)
+				else:
+					chunks = _iterencode(value, _current_indent_level)
+				yield from chunks
+		if newline_indent is not None:
+			_current_indent_level -= 1
+			yield '\n' + _indent * _current_indent_level
+		yield '}'
+		if markers is not None:
+			del markers[markerid]
+
+	def _iterencode(o, _current_indent_level):
+		if isinstance(o, str):
+			yield _encoder(o)
+		elif o is None:
+			yield 'null'
+		elif o is True:
+			yield 'true'
+		elif o is False:
+			yield 'false'
+		elif isinstance(o, int):
+			# see comment for int/float in _make_iterencode
+			yield _intstr(o)
+		elif isfloatinstance(o):
+			# see comment for int/float in _make_iterencode
+			yield _floatstr(o)
+		elif isinstance(o, (list, tuple)):
+			yield from _iterencode_list(o, _current_indent_level)
+		elif isinstance(o, dict):
+			yield from _iterencode_dict(o, _current_indent_level)
+		else:
+			if markers is not None:
+				markerid = id(o)
+				if markerid in markers:
+					raise ValueError("Circular reference detected")
+				markers[markerid] = o
+			o = _default(o)
+			yield from _iterencode(o, _current_indent_level)
+			if markers is not None:
+				del markers[markerid]
+	return _iterencode
diff --git a/json_tricks/utils.py b/json_tricks/utils.py
@@ -120,6 +120,7 @@ def get_scalar_repr(npscalar):
 		('__ndarray__', npscalar.item()),
 		('dtype', str(npscalar.dtype)),
 		('shape', ()),
+		('0dim', False),
 	))