diff --git a/doc/articles/array_to_tuple_array.py b/doc/articles/array_to_tuple_array.py index c9a21074..8ab034df 100644 --- a/doc/articles/array_to_tuple_array.py +++ b/doc/articles/array_to_tuple_array.py @@ -12,6 +12,7 @@ sys.path.append(os.getcwd()) + class ArrayProcessor: NAME = '' SORT = -1 @@ -19,7 +20,8 @@ class ArrayProcessor: def __init__(self, array: np.ndarray): self.array = array -#------------------------------------------------------------------------------- + +# ------------------------------------------------------------------------------- class AKArray2D1D(ArrayProcessor): NAME = 'ak.array_to_tuple_array()' SORT = 0 @@ -27,6 +29,7 @@ class AKArray2D1D(ArrayProcessor): def __call__(self): _ = array_to_tuple_array(self.array) + class PyArray2D1D(ArrayProcessor): NAME = 'Python construction' SORT = 1 @@ -41,9 +44,11 @@ def __call__(self): post[i] = tuple(row) post.flags.writeable = False -#------------------------------------------------------------------------------- + +# ------------------------------------------------------------------------------- NUMBER = 200 + def seconds_to_display(seconds: float) -> str: seconds /= NUMBER if seconds < 1e-4: @@ -67,9 +72,12 @@ def plot_performance(frame): # category is the size of the array for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')): # each fixture is a collection of tests for one display - fixtures = {fixture_label: fixture for fixture_label, fixture in cat.groupby('fixture')} + fixtures = { + fixture_label: fixture for fixture_label, fixture in cat.groupby('fixture') + } for fixture_count, (fixture_label, fixture) in enumerate( - (k, fixtures[k]) for k in FixtureFactory.DENSITY_TO_DISPLAY): + (k, fixtures[k]) for k in FixtureFactory.DENSITY_TO_DISPLAY + ): ax = axes[cat_count][fixture_count] # set order @@ -87,37 +95,46 @@ def plot_performance(frame): title = f'{cat_label:.0e}\n{FixtureFactory.DENSITY_TO_DISPLAY[fixture_label]}' ax.set_title(title, fontsize=6) - ax.set_box_aspect(0.75) # makes taller than wide + ax.set_box_aspect(0.75) # makes taller than wide time_max = fixture['time'].max() ax.set_yticks([0, time_max * 0.5, time_max]) - ax.set_yticklabels(['', - seconds_to_display(time_max * .5), + ax.set_yticklabels( + [ + '', + seconds_to_display(time_max * 0.5), seconds_to_display(time_max), - ], fontsize=4) + ], + fontsize=4, + ) # ax.set_xticks(x, names_display, rotation='vertical') ax.tick_params( - axis='x', - which='both', - bottom=False, - top=False, - labelbottom=False, - ) - - fig.set_size_inches(8, 4) # width, height + axis='x', + which='both', + bottom=False, + top=False, + labelbottom=False, + ) + + fig.set_size_inches(8, 4) # width, height fig.legend(post, names_display, loc='center right', fontsize=6) # horizontal, vertical - fig.text(.05, .96, f'array_to_tuple_array() Performance: {NUMBER} Iterations', fontsize=10) - fig.text(.05, .90, get_versions(), fontsize=6) + fig.text( + 0.05, + 0.96, + f'array_to_tuple_array() Performance: {NUMBER} Iterations', + fontsize=10, + ) + fig.text(0.05, 0.90, get_versions(), fontsize=6) fp = '/tmp/array_to_tuple_array.png' plt.subplots_adjust( - left=0.05, - bottom=0.05, - right=0.8, - top=0.85, - wspace=1.0, # width - hspace=0.5, - ) + left=0.05, + bottom=0.05, + right=0.8, + top=0.85, + wspace=1.0, # width + hspace=0.5, + ) # plt.rcParams.update({'font.size': 22}) plt.savefig(fp, dpi=300) @@ -127,7 +144,8 @@ def plot_performance(frame): os.system(f'open {fp}') -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- + class FixtureFactory: NAME = '' @@ -136,7 +154,7 @@ class FixtureFactory: def get_array(size: int, width_ratio: int) -> np.ndarray: if width_ratio > 1: return np.arange(size).reshape(size // width_ratio, width_ratio) - return np.arange(size) # return 1D array + return np.arange(size) # return 1D array @classmethod def get_label_array(cls, size: int) -> tp.Tuple[str, np.ndarray]: @@ -174,6 +192,7 @@ def get_array(size: int) -> np.ndarray: a = FixtureFactory.get_array(size, 2) return a + class FFC5(FixtureFactory): NAME = 'column-5' @@ -182,6 +201,7 @@ def get_array(size: int) -> np.ndarray: a = FixtureFactory.get_array(size, 5) return a + class FFC10(FixtureFactory): NAME = 'column-10' @@ -190,6 +210,7 @@ def get_array(size: int) -> np.ndarray: a = FixtureFactory.get_array(size, 10) return a + class FFC20(FixtureFactory): NAME = 'column-20' @@ -198,15 +219,17 @@ def get_array(size: int) -> np.ndarray: a = FixtureFactory.get_array(size, 20) return a + def get_versions() -> str: import platform + return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n' CLS_PROCESSOR = ( AKArray2D1D, PyArray2D1D, - ) +) CLS_FF = ( FFC1, @@ -228,10 +251,7 @@ def run_test(): record = [cls, NUMBER, fixture_label, size] print(record) try: - result = timeit.timeit( - f'runner()', - globals=locals(), - number=NUMBER) + result = timeit.timeit(f'runner()', globals=locals(), number=NUMBER) except OSError: result = np.nan finally: @@ -239,15 +259,12 @@ def run_test(): record.append(result) records.append(record) - f = pd.DataFrame.from_records(records, - columns=('cls_processor', 'number', 'fixture', 'size', 'time') - ) + f = pd.DataFrame.from_records( + records, columns=('cls_processor', 'number', 'fixture', 'size', 'time') + ) print(f) plot_performance(f) -if __name__ == '__main__': +if __name__ == '__main__': run_test() - - - diff --git a/doc/articles/array_to_tuple_iter.py b/doc/articles/array_to_tuple_iter.py index a05d5513..16111b77 100644 --- a/doc/articles/array_to_tuple_iter.py +++ b/doc/articles/array_to_tuple_iter.py @@ -12,6 +12,7 @@ sys.path.append(os.getcwd()) + class ArrayProcessor: NAME = '' SORT = -1 @@ -19,7 +20,8 @@ class ArrayProcessor: def __init__(self, array: np.ndarray): self.array = array -#------------------------------------------------------------------------------- + +# ------------------------------------------------------------------------------- class AKArray2DTupleList(ArrayProcessor): NAME = 'list(ak.array_to_tuple_iter(a2d))' SORT = 0 @@ -27,6 +29,7 @@ class AKArray2DTupleList(ArrayProcessor): def __call__(self): _ = list(array_to_tuple_iter(self.array)) + class AKArray2DTupleNext(ArrayProcessor): NAME = 'next(ak.array_to_tuple_iter(a2d))' SORT = 1 @@ -39,6 +42,7 @@ def __call__(self): except StopIteration: break + class PyArray2DTupleMapList(ArrayProcessor): NAME = 'list(map(tuple, a2d))' SORT = 2 @@ -50,6 +54,7 @@ def __call__(self): else: _ = list(map(lambda e: (e,), array)) + class PyArray2DTupleIterNext(ArrayProcessor): NAME = 'tuple(next(iter(a2d)))' SORT = 3 @@ -71,13 +76,10 @@ def __call__(self): break - - - - -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- NUMBER = 200 + def seconds_to_display(seconds: float) -> str: seconds /= NUMBER if seconds < 1e-4: @@ -101,9 +103,12 @@ def plot_performance(frame): # category is the size of the array for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')): # each fixture is a collection of tests for one display - fixtures = {fixture_label: fixture for fixture_label, fixture in cat.groupby('fixture')} + fixtures = { + fixture_label: fixture for fixture_label, fixture in cat.groupby('fixture') + } for fixture_count, (fixture_label, fixture) in enumerate( - (k, fixtures[k]) for k in FixtureFactory.DENSITY_TO_DISPLAY): + (k, fixtures[k]) for k in FixtureFactory.DENSITY_TO_DISPLAY + ): ax = axes[cat_count][fixture_count] # set order @@ -121,37 +126,46 @@ def plot_performance(frame): title = f'{cat_label:.0e}\n{FixtureFactory.DENSITY_TO_DISPLAY[fixture_label]}' ax.set_title(title, fontsize=6) - ax.set_box_aspect(0.75) # makes taller than wide + ax.set_box_aspect(0.75) # makes taller than wide time_max = fixture['time'].max() ax.set_yticks([0, time_max * 0.5, time_max]) - ax.set_yticklabels(['', - seconds_to_display(time_max * .5), + ax.set_yticklabels( + [ + '', + seconds_to_display(time_max * 0.5), seconds_to_display(time_max), - ], fontsize=4) + ], + fontsize=4, + ) # ax.set_xticks(x, names_display, rotation='vertical') ax.tick_params( - axis='x', - which='both', - bottom=False, - top=False, - labelbottom=False, - ) - - fig.set_size_inches(8, 4) # width, height + axis='x', + which='both', + bottom=False, + top=False, + labelbottom=False, + ) + + fig.set_size_inches(8, 4) # width, height fig.legend(post, names_display, loc='center right', fontsize=6) # horizontal, vertical - fig.text(.05, .96, f'array_to_tuple_iter() Performance: {NUMBER} Iterations', fontsize=10) - fig.text(.05, .90, get_versions(), fontsize=6) + fig.text( + 0.05, + 0.96, + f'array_to_tuple_iter() Performance: {NUMBER} Iterations', + fontsize=10, + ) + fig.text(0.05, 0.90, get_versions(), fontsize=6) fp = '/tmp/array_to_tuple_iter.png' plt.subplots_adjust( - left=0.05, - bottom=0.05, - right=0.8, - top=0.85, - wspace=0.1, # width - hspace=0.5, - ) + left=0.05, + bottom=0.05, + right=0.8, + top=0.85, + wspace=0.1, # width + hspace=0.5, + ) # plt.rcParams.update({'font.size': 22}) plt.savefig(fp, dpi=300) @@ -161,7 +175,8 @@ def plot_performance(frame): os.system(f'open {fp}') -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- + class FixtureFactory: NAME = '' @@ -170,7 +185,7 @@ class FixtureFactory: def get_array(size: int, width_ratio: int) -> np.ndarray: if width_ratio > 1: return np.arange(size).reshape(size // width_ratio, width_ratio) - return np.arange(size) # return 1D array + return np.arange(size) # return 1D array @classmethod def get_label_array(cls, size: int) -> tp.Tuple[str, np.ndarray]: @@ -203,6 +218,7 @@ def get_array(size: int) -> np.ndarray: a = FixtureFactory.get_array(size, 2) return a + class FFC5(FixtureFactory): NAME = 'column-5' @@ -211,6 +227,7 @@ def get_array(size: int) -> np.ndarray: a = FixtureFactory.get_array(size, 5) return a + class FFC10(FixtureFactory): NAME = 'column-10' @@ -219,6 +236,7 @@ def get_array(size: int) -> np.ndarray: a = FixtureFactory.get_array(size, 10) return a + class FFC20(FixtureFactory): NAME = 'column-20' @@ -227,8 +245,10 @@ def get_array(size: int) -> np.ndarray: a = FixtureFactory.get_array(size, 20) return a + def get_versions() -> str: import platform + return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n' @@ -237,7 +257,7 @@ def get_versions() -> str: AKArray2DTupleNext, PyArray2DTupleMapList, PyArray2DTupleIterNext, - ) +) CLS_FF = ( @@ -260,10 +280,7 @@ def run_test(): record = [cls, NUMBER, fixture_label, size] print(record) try: - result = timeit.timeit( - f'runner()', - globals=locals(), - number=NUMBER) + result = timeit.timeit(f'runner()', globals=locals(), number=NUMBER) except OSError: result = np.nan finally: @@ -271,15 +288,12 @@ def run_test(): record.append(result) records.append(record) - f = pd.DataFrame.from_records(records, - columns=('cls_processor', 'number', 'fixture', 'size', 'time') - ) + f = pd.DataFrame.from_records( + records, columns=('cls_processor', 'number', 'fixture', 'size', 'time') + ) print(f) plot_performance(f) -if __name__ == '__main__': +if __name__ == '__main__': run_test() - - - diff --git a/doc/articles/block_index.py b/doc/articles/block_index.py index 996d773b..03bc9131 100644 --- a/doc/articles/block_index.py +++ b/doc/articles/block_index.py @@ -1,6 +1,3 @@ - - - import os import sys import timeit @@ -23,8 +20,8 @@ from performance.reference.block_index import indices_to_contiguous_pairs +# ------------------------------------------------------------------------------- -#------------------------------------------------------------------------------- class ArrayProcessor: NAME = '' @@ -48,7 +45,7 @@ def __init__(self, arrays: tp.Iterable[np.ndarray]): self.selector_slice = slice(0, len(self.bi), 2) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class BlockIndexLoad(ArrayProcessor): NAME = 'BlockIndex: load' SORT = 0 @@ -59,6 +56,7 @@ def __call__(self): bi.register(a) assert bi.shape[0] == ROW_COUNT + class TupleIndexLoad(ArrayProcessor): NAME = 'TupleIndex: load' SORT = 10 @@ -76,6 +74,7 @@ def __call__(self): for _ in range(10): _ = self.bi.copy() + class TupleIndexCopy(ArrayProcessor): NAME = 'TupleIndex: copy' SORT = 12 @@ -93,6 +92,7 @@ def __call__(self): msg = pickle.dumps(self.bi) bi2 = pickle.loads(msg) + class TupleIndexPickle(ArrayProcessor): NAME = 'TupleIndex: pickle' SORT = 14 @@ -111,6 +111,7 @@ def __call__(self): for i in range(len(bi)): _ = bi[i] + class BlockIndexLookupBlock(ArrayProcessor): NAME = 'BlockIndex: lookup block' SORT = 1.1 @@ -120,6 +121,7 @@ def __call__(self): for i in range(len(bi)): _ = bi.get_block(i) + class TupleIndexLookup(ArrayProcessor): NAME = 'TupleIndex: lookup' SORT = 11 @@ -137,6 +139,7 @@ class BlockIndexIterIntArray(ArrayProcessor): def __call__(self): _ = list(self.bi.iter_contiguous(self.selector_int_array)) + class TupleIndexIterIntArray(ArrayProcessor): NAME = 'TupleIndex: contig by int array' SORT = 15 @@ -153,6 +156,7 @@ class BlockIndexIterIntList(ArrayProcessor): def __call__(self): _ = list(self.bi.iter_contiguous(self.selector_int_list)) + class TupleIndexIterIntList(ArrayProcessor): NAME = 'TupleIndex: contig by int list' SORT = 16 @@ -169,6 +173,7 @@ class BlockIndexIterSlice(ArrayProcessor): def __call__(self): _ = list(self.bi.iter_contiguous(self.selector_slice)) + class TupleIndexIterSlice(ArrayProcessor): NAME = 'TupleIndex: contig by slice' SORT = 17 @@ -178,8 +183,6 @@ def __call__(self): _ = list(indices_to_contiguous_pairs(ti[self.selector_slice])) - - class BlockIndexIterBoolArray(ArrayProcessor): NAME = 'BlockIndex: contig by bool array' SORT = 8 @@ -187,18 +190,24 @@ class BlockIndexIterBoolArray(ArrayProcessor): def __call__(self): _ = list(self.bi.iter_contiguous(self.selector_bool_array)) + class TupleIndexIterBoolArray(ArrayProcessor): NAME = 'TupleIndex: contig by bool array' SORT = 18 def __call__(self): ti = self.ti - _ = list(indices_to_contiguous_pairs(ti[i] for i, b in enumerate(self.selector_bool_array) if b)) + _ = list( + indices_to_contiguous_pairs( + ti[i] for i, b in enumerate(self.selector_bool_array) if b + ) + ) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- NUMBER = 50 + def seconds_to_display(seconds: float) -> str: seconds /= NUMBER if seconds < 1e-4: @@ -221,8 +230,7 @@ def plot_performance(frame): # category is the size of the array for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')): - for fixture_count, (fixture_label, fixture) in enumerate( - cat.groupby('fixture')): + for fixture_count, (fixture_label, fixture) in enumerate(cat.groupby('fixture')): ax = axes[cat_count][fixture_count] # set order @@ -230,22 +238,23 @@ def plot_performance(frame): fixture = fixture.sort_values('sort') results = fixture['time'].values.tolist() - x_labels = [f'{i}: {cls.NAME}' for i, cls in - zip(range(1, len(results) + 1), fixture['cls_processor']) - ] + x_labels = [ + f'{i}: {cls.NAME}' + for i, cls in zip(range(1, len(results) + 1), fixture['cls_processor']) + ] x_tick_labels = [str(l + 1) for l in range(len(x_labels))] x = np.arange(len(results)) x_bar = ax.bar(x_labels, results, color=color) title = f'{cat_label:.0e}\n{fixture_label}' ax.set_title(title, fontsize=6) - ax.set_box_aspect(0.5) # larger makes taller tan wide + ax.set_box_aspect(0.5) # larger makes taller tan wide - time_max = fixture["time"].max() - time_min = fixture["time"].min() + time_max = fixture['time'].max() + time_min = fixture['time'].min() y_ticks = [0, time_min, time_max * 0.5, time_max] y_labels = [ - "", + '', seconds_to_display(time_min), seconds_to_display(time_max * 0.5), seconds_to_display(time_max), @@ -258,7 +267,7 @@ def plot_performance(frame): ax.set_yticks(y_ticks) ax.set_yticklabels(y_labels, fontsize=4) ax.tick_params( - axis="y", + axis='y', length=2, width=0.5, pad=1, @@ -266,28 +275,28 @@ def plot_performance(frame): ax.set_xticks(x) ax.set_xticklabels(x_tick_labels, fontsize=4) ax.tick_params( - axis="x", + axis='x', length=2, width=0.5, pad=1, ) # ax.set_yscale('log') - fig.set_size_inches(9, 3.5) # width, height + fig.set_size_inches(9, 3.5) # width, height fig.legend(x_bar, x_labels, loc='center right', fontsize=6) # horizontal, vertical - fig.text(.05, .96, f'BlockIndex Performance: {NUMBER} Iterations', fontsize=10) - fig.text(.05, .90, get_versions(), fontsize=6) + fig.text(0.05, 0.96, f'BlockIndex Performance: {NUMBER} Iterations', fontsize=10) + fig.text(0.05, 0.90, get_versions(), fontsize=6) fp = '/tmp/block_index.png' plt.subplots_adjust( - left=0.075, - bottom=0.05, - right=0.80, - top=0.80, - wspace=0.6, # width - hspace=0.6, - ) + left=0.075, + bottom=0.05, + right=0.80, + top=0.80, + wspace=0.6, # width + hspace=0.6, + ) # plt.rcParams.update({'font.size': 22}) plt.savefig(fp, dpi=300) @@ -297,10 +306,11 @@ def plot_performance(frame): os.system(f'open {fp}') -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- ROW_COUNT = 2 + class FixtureFactory: NAME = '' @@ -325,7 +335,10 @@ def get_arrays(size: int) -> tp.Iterator[np.ndarray]: yield a size -= 1 + from itertools import cycle + + class FFMixed(FixtureFactory): NAME = 'mixed' @@ -339,6 +352,7 @@ def get_arrays(size: int) -> tp.Iterator[np.ndarray]: yield a size -= w + class FFUniform(FixtureFactory): NAME = 'uniform' @@ -351,6 +365,7 @@ def get_arrays(size: int) -> tp.Iterator[np.ndarray]: def get_versions() -> str: import platform + return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n' @@ -372,7 +387,7 @@ def get_versions() -> str: TupleIndexIterBoolArray, BlockIndexIterSlice, TupleIndexIterSlice, - ) +) CLS_FF = ( FFColumnar, @@ -392,10 +407,7 @@ def run_test(): record = [cls, NUMBER, fixture_label, size] print(record) try: - result = timeit.timeit( - f'runner()', - globals=locals(), - number=NUMBER) + result = timeit.timeit(f'runner()', globals=locals(), number=NUMBER) except OSError: result = np.nan finally: @@ -403,15 +415,12 @@ def run_test(): record.append(result) records.append(record) - f = pd.DataFrame.from_records(records, - columns=('cls_processor', 'number', 'fixture', 'size', 'time') - ) + f = pd.DataFrame.from_records( + records, columns=('cls_processor', 'number', 'fixture', 'size', 'time') + ) print(f) plot_performance(f) -if __name__ == '__main__': +if __name__ == '__main__': run_test() - - - diff --git a/doc/articles/first_true_1d.py b/doc/articles/first_true_1d.py index 4583a683..b759bc80 100644 --- a/doc/articles/first_true_1d.py +++ b/doc/articles/first_true_1d.py @@ -1,6 +1,3 @@ - - - import os import sys import timeit @@ -17,7 +14,6 @@ sys.path.append(os.getcwd()) - class ArrayProcessor: NAME = '' SORT = -1 @@ -25,7 +21,8 @@ class ArrayProcessor: def __init__(self, array: np.ndarray): self.array = array -#------------------------------------------------------------------------------- + +# ------------------------------------------------------------------------------- class AKFirstTrue(ArrayProcessor): NAME = 'ak.first_true_1d()' SORT = 0 @@ -33,6 +30,7 @@ class AKFirstTrue(ArrayProcessor): def __call__(self): _ = first_true_1d(self.array, forward=True) + class PYLoop(ArrayProcessor): NAME = 'Python Loop' SORT = 0 @@ -50,6 +48,7 @@ class NPNonZero(ArrayProcessor): def __call__(self): _ = np.nonzero(self.array)[0][0] + class NPArgMax(ArrayProcessor): NAME = 'np.argmax()' SORT = 1 @@ -57,6 +56,7 @@ class NPArgMax(ArrayProcessor): def __call__(self): _ = np.argmax(self.array) + class NPNotAnyArgMax(ArrayProcessor): NAME = 'np.any(), np.argmax()' SORT = 2 @@ -65,9 +65,11 @@ def __call__(self): _ = not np.any(self.array) _ = np.argmax(self.array) -#------------------------------------------------------------------------------- + +# ------------------------------------------------------------------------------- NUMBER = 200 + def seconds_to_display(seconds: float) -> str: seconds /= NUMBER if seconds < 1e-4: @@ -90,8 +92,7 @@ def plot_performance(frame): # category is the size of the array for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')): - for fixture_count, (fixture_label, fixture) in enumerate( - cat.groupby('fixture')): + for fixture_count, (fixture_label, fixture) in enumerate(cat.groupby('fixture')): ax = axes[cat_count][fixture_count] # set order @@ -109,37 +110,41 @@ def plot_performance(frame): title = f'{cat_label:.0e}\n{FixtureFactory.DENSITY_TO_DISPLAY[density]}\n{FixtureFactory.POSITION_TO_DISPLAY[position]}' ax.set_title(title, fontsize=6) - ax.set_box_aspect(0.75) # makes taller tan wide + ax.set_box_aspect(0.75) # makes taller tan wide time_max = fixture['time'].max() ax.set_yticks([0, time_max * 0.5, time_max]) - ax.set_yticklabels(['', - seconds_to_display(time_max * .5), + ax.set_yticklabels( + [ + '', + seconds_to_display(time_max * 0.5), seconds_to_display(time_max), - ], fontsize=6) + ], + fontsize=6, + ) # ax.set_xticks(x, names_display, rotation='vertical') ax.tick_params( - axis='x', - which='both', - bottom=False, - top=False, - labelbottom=False, - ) - - fig.set_size_inches(9, 3.5) # width, height + axis='x', + which='both', + bottom=False, + top=False, + labelbottom=False, + ) + + fig.set_size_inches(9, 3.5) # width, height fig.legend(post, names_display, loc='center right', fontsize=8) # horizontal, vertical - fig.text(.05, .96, f'first_true_1d() Performance: {NUMBER} Iterations', fontsize=10) - fig.text(.05, .90, get_versions(), fontsize=6) + fig.text(0.05, 0.96, f'first_true_1d() Performance: {NUMBER} Iterations', fontsize=10) + fig.text(0.05, 0.90, get_versions(), fontsize=6) fp = '/tmp/first_true.png' plt.subplots_adjust( - left=0.075, - bottom=0.05, - right=0.80, - top=0.85, - wspace=1, # width - hspace=0.1, - ) + left=0.075, + bottom=0.05, + right=0.80, + top=0.85, + wspace=1, # width + hspace=0.1, + ) # plt.rcParams.update({'font.size': 22}) plt.savefig(fp, dpi=300) @@ -149,7 +154,8 @@ def plot_performance(frame): os.system(f'open {fp}') -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- + class FixtureFactory: NAME = '' @@ -159,13 +165,13 @@ def get_array(size: int) -> np.ndarray: return np.full(size, False, dtype=bool) def _get_array_filled( - size: int, - start_third: int, # 1 or 2 - density: float, # less than 1 - ) -> np.ndarray: + size: int, + start_third: int, # 1 or 2 + density: float, # less than 1 + ) -> np.ndarray: a = FixtureFactory.get_array(size) count = size * density - start = int(len(a) * (start_third/3)) + start = int(len(a) * (start_third / 3)) length = len(a) - start step = int(length / count) fill = np.arange(start, len(a), step) @@ -195,16 +201,17 @@ class FFSingleFirstThird(FixtureFactory): @staticmethod def get_array(size: int) -> np.ndarray: a = FixtureFactory.get_array(size) - a[int(len(a) * (1/3))] = True + a[int(len(a) * (1 / 3))] = True return a + class FFSingleSecondThird(FixtureFactory): NAME = 'single-second_third' @staticmethod def get_array(size: int) -> np.ndarray: a = FixtureFactory.get_array(size) - a[int(len(a) * (2/3))] = True + a[int(len(a) * (2 / 3))] = True return a @@ -213,7 +220,7 @@ class FFTenthPostFirstThird(FixtureFactory): @classmethod def get_array(cls, size: int) -> np.ndarray: - return cls._get_array_filled(size, start_third=1, density=.1) + return cls._get_array_filled(size, start_third=1, density=0.1) class FFTenthPostSecondThird(FixtureFactory): @@ -221,7 +228,7 @@ class FFTenthPostSecondThird(FixtureFactory): @classmethod def get_array(cls, size: int) -> np.ndarray: - return cls._get_array_filled(size, start_third=2, density=.1) + return cls._get_array_filled(size, start_third=2, density=0.1) class FFThirdPostFirstThird(FixtureFactory): @@ -229,7 +236,7 @@ class FFThirdPostFirstThird(FixtureFactory): @classmethod def get_array(cls, size: int) -> np.ndarray: - return cls._get_array_filled(size, start_third=1, density=1/3) + return cls._get_array_filled(size, start_third=1, density=1 / 3) class FFThirdPostSecondThird(FixtureFactory): @@ -237,11 +244,12 @@ class FFThirdPostSecondThird(FixtureFactory): @classmethod def get_array(cls, size: int) -> np.ndarray: - return cls._get_array_filled(size, start_third=2, density=1/3) + return cls._get_array_filled(size, start_third=2, density=1 / 3) def get_versions() -> str: import platform + return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n' @@ -251,7 +259,7 @@ def get_versions() -> str: NPArgMax, NPNotAnyArgMax, # PYLoop, - ) +) CLS_FF = ( FFSingleFirstThird, @@ -274,10 +282,7 @@ def run_test(): record = [cls, NUMBER, fixture_label, size] print(record) try: - result = timeit.timeit( - f'runner()', - globals=locals(), - number=NUMBER) + result = timeit.timeit(f'runner()', globals=locals(), number=NUMBER) except OSError: result = np.nan finally: @@ -285,15 +290,12 @@ def run_test(): record.append(result) records.append(record) - f = pd.DataFrame.from_records(records, - columns=('cls_processor', 'number', 'fixture', 'size', 'time') - ) + f = pd.DataFrame.from_records( + records, columns=('cls_processor', 'number', 'fixture', 'size', 'time') + ) print(f) plot_performance(f) -if __name__ == '__main__': +if __name__ == '__main__': run_test() - - - diff --git a/doc/articles/first_true_2d.py b/doc/articles/first_true_2d.py index 8d81af35..a4e02d20 100644 --- a/doc/articles/first_true_2d.py +++ b/doc/articles/first_true_2d.py @@ -1,6 +1,3 @@ - - - import os import sys import timeit @@ -17,7 +14,6 @@ sys.path.append(os.getcwd()) - class ArrayProcessor: NAME = '' SORT = -1 @@ -25,7 +21,8 @@ class ArrayProcessor: def __init__(self, array: np.ndarray): self.array = array -#------------------------------------------------------------------------------- + +# ------------------------------------------------------------------------------- class AKFirstTrueAxis0Forward(ArrayProcessor): NAME = 'ak.first_true_2d(forward=True, axis=0)' SORT = 0 @@ -33,6 +30,7 @@ class AKFirstTrueAxis0Forward(ArrayProcessor): def __call__(self): _ = first_true_2d(self.array, forward=True, axis=0) + class AKFirstTrueAxis1Forward(ArrayProcessor): NAME = 'ak.first_true_2d(forward=True, axis=1)' SORT = 0 @@ -40,6 +38,7 @@ class AKFirstTrueAxis1Forward(ArrayProcessor): def __call__(self): _ = first_true_2d(self.array, forward=True, axis=1) + class AKFirstTrueAxis0Reverse(ArrayProcessor): NAME = 'ak.first_true_2d(forward=False, axis=0)' SORT = 1 @@ -47,6 +46,7 @@ class AKFirstTrueAxis0Reverse(ArrayProcessor): def __call__(self): _ = first_true_2d(self.array, forward=False, axis=0) + class AKFirstTrueAxis1Reverse(ArrayProcessor): NAME = 'ak.first_true_2d(forward=False, axis=1)' SORT = 1 @@ -72,6 +72,7 @@ def __call__(self): _ = ~np.any(self.array, axis=0) _ = np.argmax(self.array, axis=0) + class NPArgMaxAxis1(ArrayProcessor): NAME = 'np.any(axis=1), np.argmax(axis=1)' SORT = 4 @@ -81,10 +82,10 @@ def __call__(self): _ = np.argmax(self.array, axis=1) - -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- NUMBER = 100 + def seconds_to_display(seconds: float) -> str: seconds /= NUMBER if seconds < 1e-4: @@ -107,8 +108,7 @@ def plot_performance(frame): # category is the size of the array for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')): - for fixture_count, (fixture_label, fixture) in enumerate( - cat.groupby('fixture')): + for fixture_count, (fixture_label, fixture) in enumerate(cat.groupby('fixture')): ax = axes[cat_count][fixture_count] # set order @@ -126,37 +126,41 @@ def plot_performance(frame): title = f'{cat_label:.0e}\n{FixtureFactory.DENSITY_TO_DISPLAY[density]}\n{FixtureFactory.POSITION_TO_DISPLAY[position]}' ax.set_title(title, fontsize=6) - ax.set_box_aspect(0.75) # makes taller tan wide + ax.set_box_aspect(0.75) # makes taller tan wide time_max = fixture['time'].max() ax.set_yticks([0, time_max * 0.5, time_max]) - ax.set_yticklabels(['', - seconds_to_display(time_max * .5), + ax.set_yticklabels( + [ + '', + seconds_to_display(time_max * 0.5), seconds_to_display(time_max), - ], fontsize=6) + ], + fontsize=6, + ) # ax.set_xticks(x, names_display, rotation='vertical') ax.tick_params( - axis='x', - which='both', - bottom=False, - top=False, - labelbottom=False, - ) - - fig.set_size_inches(9, 3.5) # width, height + axis='x', + which='both', + bottom=False, + top=False, + labelbottom=False, + ) + + fig.set_size_inches(9, 3.5) # width, height fig.legend(post, names_display, loc='center right', fontsize=6) # horizontal, vertical - fig.text(.05, .96, f'first_true_2d() Performance: {NUMBER} Iterations', fontsize=10) - fig.text(.05, .90, get_versions(), fontsize=6) + fig.text(0.05, 0.96, f'first_true_2d() Performance: {NUMBER} Iterations', fontsize=10) + fig.text(0.05, 0.90, get_versions(), fontsize=6) fp = '/tmp/first_true.png' plt.subplots_adjust( - left=0.075, - bottom=0.05, - right=0.75, - top=0.85, - wspace=1, # width - hspace=0.1, - ) + left=0.075, + bottom=0.05, + right=0.75, + top=0.85, + wspace=1, # width + hspace=0.1, + ) # plt.rcParams.update({'font.size': 22}) plt.savefig(fp, dpi=300) @@ -166,7 +170,8 @@ def plot_performance(frame): os.system(f'open {fp}') -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- + class FixtureFactory: NAME = '' @@ -176,13 +181,13 @@ def get_array(size: int) -> np.ndarray: return np.full(size, False, dtype=bool) def _get_array_filled( - size: int, - start_third: int, # 1 or 2 - density: float, # less than 1 - ) -> np.ndarray: + size: int, + start_third: int, # 1 or 2 + density: float, # less than 1 + ) -> np.ndarray: a = FixtureFactory.get_array(size) count = size * density - start = int(len(a) * (start_third/3)) + start = int(len(a) * (start_third / 3)) length = len(a) - start step = int(length / count) fill = np.arange(start, len(a), step) @@ -212,16 +217,17 @@ class FFSingleFirstThird(FixtureFactory): @staticmethod def get_array(size: int) -> np.ndarray: a = FixtureFactory.get_array(size) - a[int(len(a) * (1/3))] = True + a[int(len(a) * (1 / 3))] = True return a + class FFSingleSecondThird(FixtureFactory): NAME = 'single-second_third' @staticmethod def get_array(size: int) -> np.ndarray: a = FixtureFactory.get_array(size) - a[int(len(a) * (2/3))] = True + a[int(len(a) * (2 / 3))] = True return a @@ -230,7 +236,7 @@ class FFTenthPostFirstThird(FixtureFactory): @classmethod def get_array(cls, size: int) -> np.ndarray: - return cls._get_array_filled(size, start_third=1, density=.1) + return cls._get_array_filled(size, start_third=1, density=0.1) class FFTenthPostSecondThird(FixtureFactory): @@ -238,7 +244,7 @@ class FFTenthPostSecondThird(FixtureFactory): @classmethod def get_array(cls, size: int) -> np.ndarray: - return cls._get_array_filled(size, start_third=2, density=.1) + return cls._get_array_filled(size, start_third=2, density=0.1) class FFThirdPostFirstThird(FixtureFactory): @@ -246,7 +252,7 @@ class FFThirdPostFirstThird(FixtureFactory): @classmethod def get_array(cls, size: int) -> np.ndarray: - return cls._get_array_filled(size, start_third=1, density=1/3) + return cls._get_array_filled(size, start_third=1, density=1 / 3) class FFThirdPostSecondThird(FixtureFactory): @@ -254,11 +260,12 @@ class FFThirdPostSecondThird(FixtureFactory): @classmethod def get_array(cls, size: int) -> np.ndarray: - return cls._get_array_filled(size, start_third=2, density=1/3) + return cls._get_array_filled(size, start_third=2, density=1 / 3) def get_versions() -> str: import platform + return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n' @@ -269,8 +276,8 @@ def get_versions() -> str: AKFirstTrueAxis1Reverse, NPNonZero, NPArgMaxAxis0, - NPArgMaxAxis1 - ) + NPArgMaxAxis1, +) CLS_FF = ( FFSingleFirstThird, @@ -296,10 +303,7 @@ def run_test(): record = [cls, NUMBER, fixture_label, size] print(record) try: - result = timeit.timeit( - f'runner()', - globals=locals(), - number=NUMBER) + result = timeit.timeit(f'runner()', globals=locals(), number=NUMBER) except OSError: result = np.nan finally: @@ -307,15 +311,12 @@ def run_test(): record.append(result) records.append(record) - f = pd.DataFrame.from_records(records, - columns=('cls_processor', 'number', 'fixture', 'size', 'time') - ) + f = pd.DataFrame.from_records( + records, columns=('cls_processor', 'number', 'fixture', 'size', 'time') + ) print(f) plot_performance(f) -if __name__ == '__main__': +if __name__ == '__main__': run_test() - - - diff --git a/doc/articles/nonzero_1d.py b/doc/articles/nonzero_1d.py index a8ce068f..cfdc3954 100644 --- a/doc/articles/nonzero_1d.py +++ b/doc/articles/nonzero_1d.py @@ -1,6 +1,3 @@ - - - import os import sys import timeit @@ -16,7 +13,6 @@ sys.path.append(os.getcwd()) - class ArrayProcessor: NAME = '' SORT = -1 @@ -24,7 +20,8 @@ class ArrayProcessor: def __init__(self, array: np.ndarray): self.array = array -#------------------------------------------------------------------------------- + +# ------------------------------------------------------------------------------- class AKNonZero(ArrayProcessor): NAME = 'ak.nonzero_1d()' SORT = 0 @@ -32,6 +29,7 @@ class AKNonZero(ArrayProcessor): def __call__(self): _ = nonzero_1d(self.array) + class NPNonZero(ArrayProcessor): NAME = 'np.nonzero()' SORT = 1 @@ -39,6 +37,7 @@ class NPNonZero(ArrayProcessor): def __call__(self): _ = np.nonzero(self.array)[0] + class NPNonZeroInt64Convert(ArrayProcessor): NAME = 'np.nonzero()\n.astype(np.int64)' SORT = 3 @@ -47,9 +46,10 @@ def __call__(self): _ = np.nonzero(self.array)[0].astype(np.int64) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- NUMBER = 200 + def seconds_to_display(seconds: float) -> str: seconds /= NUMBER if seconds < 1e-4: @@ -73,7 +73,9 @@ def plot_performance(frame): # category is the size of the array for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')): # each fixture is a collection of tests for one display - fixtures = {fixture_label: fixture for fixture_label, fixture in cat.groupby('fixture')} + fixtures = { + fixture_label: fixture for fixture_label, fixture in cat.groupby('fixture') + } for fixture_count, (fixture_label, fixture) in enumerate(fixtures.items()): ax = axes[cat_count][fixture_count] @@ -92,37 +94,41 @@ def plot_performance(frame): title = f'{cat_label:.0e}\n{FixtureFactory.DENSITY_TO_DISPLAY[fixture_label]}' ax.set_title(title, fontsize=6) - ax.set_box_aspect(0.75) # makes taller than wide + ax.set_box_aspect(0.75) # makes taller than wide time_max = fixture['time'].max() ax.set_yticks([0, time_max * 0.5, time_max]) - ax.set_yticklabels(['', - seconds_to_display(time_max * .5), + ax.set_yticklabels( + [ + '', + seconds_to_display(time_max * 0.5), seconds_to_display(time_max), - ], fontsize=4) + ], + fontsize=4, + ) # ax.set_xticks(x, names_display, rotation='vertical') ax.tick_params( - axis='x', - which='both', - bottom=False, - top=False, - labelbottom=False, - ) - - fig.set_size_inches(10, 4) # width, height + axis='x', + which='both', + bottom=False, + top=False, + labelbottom=False, + ) + + fig.set_size_inches(10, 4) # width, height fig.legend(post, names_display, loc='center right', fontsize=6) # horizontal, vertical - fig.text(.05, .96, f'nonzero_1d() Performance: {NUMBER} Iterations', fontsize=10) - fig.text(.05, .90, get_versions(), fontsize=6) + fig.text(0.05, 0.96, f'nonzero_1d() Performance: {NUMBER} Iterations', fontsize=10) + fig.text(0.05, 0.90, get_versions(), fontsize=6) fp = '/tmp/nonzero.png' plt.subplots_adjust( - left=0.05, - bottom=0.05, - right=0.85, - top=0.85, - wspace=0.9, # width - hspace=0.0, - ) + left=0.05, + bottom=0.05, + right=0.85, + top=0.85, + wspace=0.9, # width + hspace=0.0, + ) # plt.rcParams.update({'font.size': 22}) plt.savefig(fp, dpi=300) @@ -132,7 +138,8 @@ def plot_performance(frame): os.system(f'open {fp}') -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- + class FixtureFactory: NAME = '' @@ -145,14 +152,14 @@ def get_array(size: int, contiguous: bool = True) -> np.ndarray: return np.full(size * 2, False, dtype=bool)[::2] def _get_array_filled( - size: int, - start_third: int, #0, 1 or 2 - density: float, # less than 1 - contiguous: bool, - ) -> np.ndarray: + size: int, + start_third: int, # 0, 1 or 2 + density: float, # less than 1 + contiguous: bool, + ) -> np.ndarray: a = FixtureFactory.get_array(size, contiguous) count = size * density - start = int(len(a) * (start_third/3)) + start = int(len(a) * (start_third / 3)) length = len(a) - start step = max(int(length / count), 1) fill = np.arange(start, len(a), step) @@ -190,6 +197,7 @@ def get_array(size: int) -> np.ndarray: a[len(a) // 2] = True return a + class FFSingleNC(FixtureFactory): NAME = 'single-nc' @@ -199,6 +207,7 @@ def get_array(size: int) -> np.ndarray: a[len(a) // 2] = True return a + class FFQuarter(FixtureFactory): NAME = 'quarter-c' @@ -206,6 +215,7 @@ class FFQuarter(FixtureFactory): def get_array(cls, size: int) -> np.ndarray: return cls._get_array_filled(size, start_third=0, density=0.25, contiguous=True) + class FFQuarterNC(FixtureFactory): NAME = 'quarter-nc' @@ -221,6 +231,7 @@ class FFHalf(FixtureFactory): def get_array(cls, size: int) -> np.ndarray: return cls._get_array_filled(size, start_third=0, density=0.5, contiguous=True) + class FFHalfNC(FixtureFactory): NAME = 'half-nc' @@ -229,7 +240,6 @@ def get_array(cls, size: int) -> np.ndarray: return cls._get_array_filled(size, start_third=0, density=0.5, contiguous=False) - class FFFull(FixtureFactory): NAME = 'full-c' @@ -248,6 +258,7 @@ def get_array(cls, size: int) -> np.ndarray: def get_versions() -> str: import platform + return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n' @@ -255,7 +266,7 @@ def get_versions() -> str: AKNonZero, NPNonZero, NPNonZeroInt64Convert, - ) +) CLS_FF = ( FFSingle, @@ -266,7 +277,6 @@ def get_versions() -> str: FFHalfNC, FFFull, FFFullNC, - ) @@ -281,10 +291,7 @@ def run_test(): record = [cls, NUMBER, fixture_label, size] print(record) try: - result = timeit.timeit( - f'runner()', - globals=locals(), - number=NUMBER) + result = timeit.timeit(f'runner()', globals=locals(), number=NUMBER) except OSError: result = np.nan finally: @@ -292,15 +299,12 @@ def run_test(): record.append(result) records.append(record) - f = pd.DataFrame.from_records(records, - columns=('cls_processor', 'number', 'fixture', 'size', 'time') - ) + f = pd.DataFrame.from_records( + records, columns=('cls_processor', 'number', 'fixture', 'size', 'time') + ) print(f) plot_performance(f) -if __name__ == '__main__': +if __name__ == '__main__': run_test() - - - diff --git a/noxfile.py b/noxfile.py index 193e6d8f..52ab347e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -2,77 +2,98 @@ import sys ARTIFACTS = ( - "*.egg-info", - ".hypothesis", - "build", - "dist", - "src/*.so", + '*.egg-info', + '.hypothesis', + 'build', + 'dist', + 'src/*.so', ) # Make `nox` default to running tests if you just do `nox` -nox.options.sessions = ["test"] +nox.options.sessions = ['test'] def do_clean(session: nox.Session) -> None: # uninstall arraykit session.run( - sys.executable, "-m", "pip", - "--disable-pip-version-check", "uninstall", "--yes", "arraykit", - external=True + sys.executable, + '-m', + 'pip', + '--disable-pip-version-check', + 'uninstall', + '--yes', + 'arraykit', + external=True, ) # remove artifacts for artifact in sorted(ARTIFACTS): - session.run("rm", "-rf", artifact, external=True) + session.run('rm', '-rf', artifact, external=True) + def do_build(session: nox.Session) -> None: # keep -v to see warnings; no build isolation to match your invoke cmd session.run( - sys.executable, "-m", "pip", - "--disable-pip-version-check", - "install", "-v", "--no-build-isolation", ".", - external=True + sys.executable, + '-m', + 'pip', + '--disable-pip-version-check', + 'install', + '-v', + '--no-build-isolation', + '.', + external=True, ) + def do_test(session: nox.Session) -> None: session.run( - "pytest", - "-s", - "--disable-pytest-warnings", - "--tb=native", + 'pytest', + '-s', + '--disable-pytest-warnings', + '--tb=native', external=True, ) + def do_performance(session: nox.Session) -> None: """Run performance benchmarks.""" - args = [sys.executable, "-m", "performance"] + args = [sys.executable, '-m', 'performance'] if session.posargs: - args.extend(["--names"] + session.posargs) + args.extend(['--names'] + session.posargs) session.run(*args, external=True) + def do_lint(session: nox.Session) -> None: session.run( - "pylint", - "-f", "colorized", - "*.py", "performance", "src", "test", + 'pylint', + '-f', + 'colorized', + '*.py', + 'performance', + 'src', + 'test', external=True, ) # NOTE: use `nox -s build` to launch a session + @nox.session(python=False) # use current environment def clean(session): """Clean build artifacts and uninstall arraykit.""" do_clean(session) + @nox.session(python=False) def build(session): """Clean then build/install locally (like invoke: build depends on clean).""" do_clean(session) do_build(session) + @nox.session(python=False) def test(session): """Build then run pytest (like invoke: test depends on build).""" @@ -80,6 +101,7 @@ def test(session): do_build(session) do_test(session) + @nox.session(python=False) def performance(session): """Build then run performance benches (like invoke: performance depends on build).""" @@ -87,6 +109,7 @@ def performance(session): do_build(session) do_performance(session) + @nox.session(python=False) def lint(session): """Run pylint static analysis.""" diff --git a/performance/__main__.py b/performance/__main__.py index 718ee57c..537037f9 100644 --- a/performance/__main__.py +++ b/performance/__main__.py @@ -27,7 +27,9 @@ from performance.reference.util import get_new_indexers_and_screen_ref from performance.reference.util import split_after_count as split_after_count_ref from performance.reference.util import count_iteration as count_iteration_ref -from performance.reference.util import slice_to_ascending_slice as slice_to_ascending_slice_ref +from performance.reference.util import ( + slice_to_ascending_slice as slice_to_ascending_slice_ref, +) from performance.reference.array_go import ArrayGO as ArrayGOREF @@ -55,30 +57,40 @@ class Perf: FUNCTIONS = ('main',) NUMBER = 10 -class FixtureFileLike: +class FixtureFileLike: COUNT_ROW = 1_000_000 COUNT_COLUMN = 10 def __init__(self): - records_int = [','.join(str(x) for x in range(self.COUNT_COLUMN))] * self.COUNT_ROW + records_int = [ + ','.join(str(x) for x in range(self.COUNT_COLUMN)) + ] * self.COUNT_ROW self.file_like_int = io.StringIO('\n'.join(records_int)) - records_bool = [','.join(str(bool(x % 2)) for x in range(self.COUNT_COLUMN))] * self.COUNT_ROW + records_bool = [ + ','.join(str(bool(x % 2)) for x in range(self.COUNT_COLUMN)) + ] * self.COUNT_ROW self.file_like_bool = io.StringIO('\n'.join(records_bool)) - records_str = [','.join('foobar' for x in range(self.COUNT_COLUMN))] * self.COUNT_ROW + records_str = [ + ','.join('foobar' for x in range(self.COUNT_COLUMN)) + ] * self.COUNT_ROW self.file_like_str = io.StringIO('\n'.join(records_str)) - records_float = [','.join('1.2345' for x in range(self.COUNT_COLUMN))] * self.COUNT_ROW + records_float = [ + ','.join('1.2345' for x in range(self.COUNT_COLUMN)) + ] * self.COUNT_ROW self.file_like_float = io.StringIO('\n'.join(records_float)) self.axis = 1 + # #------------------------------------------------------------------------------- class DelimitedToArraysTypedPandas(FixtureFileLike, Perf): FUNCTIONS = ('bool_uniform', 'int_uniform', 'str_uniform', 'float_uniform') + class DelimitedToArraysTypedPandasAK(DelimitedToArraysTypedPandas): entry = staticmethod(delimited_to_arrays_ak) dtypes_int = ([int] * FixtureFileLike.COUNT_COLUMN).__getitem__ @@ -105,6 +117,7 @@ def float_uniform(self): class DelimitedToArraysTypedPandasREF(DelimitedToArraysTypedPandas): import pandas + entry = staticmethod(pandas.read_csv) dtypes_int = {i: int for i in range(FixtureFileLike.COUNT_COLUMN)} dtypes_bool = {i: bool for i in range(FixtureFileLike.COUNT_COLUMN)} @@ -127,11 +140,14 @@ def float_uniform(self): self.file_like_float.seek(0) _ = self.entry(self.file_like_float, dtype=self.dtypes_float) + # #------------------------------------------------------------------------------- + class DelimitedToArraysParsedPandas(FixtureFileLike, Perf): FUNCTIONS = ('bool_uniform', 'int_uniform', 'str_uniform', 'float_uniform') + class DelimitedToArraysParsedPandasAK(DelimitedToArraysParsedPandas): entry = staticmethod(delimited_to_arrays_ak) @@ -154,6 +170,7 @@ def float_uniform(self): class DelimitedToArraysParsedPandasREF(DelimitedToArraysParsedPandas): import pandas + entry = staticmethod(pandas.read_csv) def int_uniform(self): @@ -177,6 +194,7 @@ def float_uniform(self): class DelimitedToArraysTypedGenft(FixtureFileLike, Perf): FUNCTIONS = ('bool_uniform', 'int_uniform', 'str_uniform', 'float_uniform') + class DelimitedToArraysTypedGenftAK(DelimitedToArraysTypedGenft): entry = staticmethod(delimited_to_arrays_ak) @@ -284,24 +302,25 @@ def float_uniform(self): # _ = self.entry(self.file_like_float, delimiter=',', dtype=None) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class MLoc(Perf): - def __init__(self): self.array = np.arange(100) def main(self): self.entry(self.array) + class MLocAK(MLoc): entry = staticmethod(mloc_ak) + class MLocREF(MLoc): entry = staticmethod(mloc_ref) -#------------------------------------------------------------------------------- -class ImmutableFilter(Perf): +# ------------------------------------------------------------------------------- +class ImmutableFilter(Perf): def __init__(self): self.array = np.arange(100) @@ -309,15 +328,17 @@ def main(self): a2 = self.entry(self.array) a3 = self.entry(a2) + class ImmutableFilterAK(ImmutableFilter): entry = staticmethod(immutable_filter_ak) + class ImmutableFilterREF(ImmutableFilter): entry = staticmethod(immutable_filter_ref) -#------------------------------------------------------------------------------- -class NameFilter(Perf): +# ------------------------------------------------------------------------------- +class NameFilter(Perf): def __init__(self): self.name1 = ('foo', None, ['bar']) self.name2 = 'foo' @@ -329,15 +350,17 @@ def main(self): pass self.entry(self.name2) + class NameFilterAK(NameFilter): entry = staticmethod(name_filter_ak) + class NameFilterREF(NameFilter): entry = staticmethod(name_filter_ref) -#------------------------------------------------------------------------------- -class ShapeFilter(Perf): +# ------------------------------------------------------------------------------- +class ShapeFilter(Perf): def __init__(self): self.array1 = np.arange(100) self.array2 = self.array1.reshape(20, 5) @@ -346,15 +369,17 @@ def main(self): self.entry(self.array1) self.entry(self.array2) + class ShapeFilterAK(ShapeFilter): entry = staticmethod(shape_filter_ak) + class ShapeFilterREF(ShapeFilter): entry = staticmethod(shape_filter_ref) -#------------------------------------------------------------------------------- -class Column2DFilter(Perf): +# ------------------------------------------------------------------------------- +class Column2DFilter(Perf): def __init__(self): self.array1 = np.arange(100) self.array2 = self.array1.reshape(20, 5) @@ -363,16 +388,17 @@ def main(self): self.entry(self.array1) self.entry(self.array2) + class Column2DFilterAK(Column2DFilter): entry = staticmethod(column_2d_filter_ak) + class Column2DFilterREF(Column2DFilter): entry = staticmethod(column_2d_filter_ref) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class Column1DFilter(Perf): - def __init__(self): self.array1 = np.arange(100) self.array2 = self.array1.reshape(100, 1) @@ -381,15 +407,17 @@ def main(self): self.entry(self.array1) self.entry(self.array2) + class Column1DFilterAK(Column1DFilter): entry = staticmethod(column_1d_filter_ak) + class Column1DFilterREF(Column1DFilter): entry = staticmethod(column_1d_filter_ref) -#------------------------------------------------------------------------------- -class Row1DFilter(Perf): +# ------------------------------------------------------------------------------- +class Row1DFilter(Perf): def __init__(self): self.array1 = np.arange(100) self.array2 = self.array1.reshape(1, 100) @@ -398,16 +426,17 @@ def main(self): self.entry(self.array1) self.entry(self.array2) + class Row1DFilterAK(Row1DFilter): entry = staticmethod(row_1d_filter_ak) + class Row1DFilterREF(Row1DFilter): entry = staticmethod(row_1d_filter_ref) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class ResolveDType(Perf): - def __init__(self): self.dtype1 = np.arange(100).dtype self.dtype2 = np.array(('a', 'b')).dtype @@ -415,26 +444,25 @@ def __init__(self): def main(self): self.entry(self.dtype1, self.dtype2) + class ResolveDTypeAK(ResolveDType): entry = staticmethod(resolve_dtype_ak) + class ResolveDTypeREF(ResolveDType): entry = staticmethod(resolve_dtype_ref) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class ResolveDTypeIter(Perf): - FUNCTIONS = ('iter10', 'iter100000') NUMBER = 500 def __init__(self): self.dtypes10 = [np.dtype(int)] * 9 + [np.dtype(float)] self.dtypes100000 = ( - [np.dtype(int)] * 50000 + - [np.dtype(float)] * 49999 + - [np.dtype(bool)] - ) + [np.dtype(int)] * 50000 + [np.dtype(float)] * 49999 + [np.dtype(bool)] + ) def iter10(self): self.entry(self.dtypes10) @@ -442,14 +470,16 @@ def iter10(self): def iter100000(self): self.entry(self.dtypes100000) + class ResolveDTypeIterAK(ResolveDTypeIter): entry = staticmethod(resolve_dtype_iter_ak) + class ResolveDTypeIterREF(ResolveDTypeIter): entry = staticmethod(resolve_dtype_iter_ref) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class ArrayDeepcopy(Perf): FUNCTIONS = ('memo_new', 'memo_shared') NUMBER = 500 @@ -457,7 +487,7 @@ class ArrayDeepcopy(Perf): def __init__(self): self.array1 = np.arange(100_000) self.array2 = np.full(100_000, None) - self.array2[0] = [np.nan] # add a mutable + self.array2[0] = [np.nan] # add a mutable self.memo = {} def memo_new(self): @@ -469,14 +499,16 @@ def memo_shared(self): self.entry(self.array1, self.memo) self.entry(self.array2, self.memo) + class ArrayDeepcopyAK(ArrayDeepcopy): entry = staticmethod(array_deepcopy_ak) + class ArrayDeepcopyREF(ArrayDeepcopy): entry = staticmethod(array_deepcopy_ref) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class ArrayGOPerf(Perf): NUMBER = 500 @@ -490,14 +522,16 @@ def main(self): if i % 50: _ = ag.values + class ArrayGOPerfAK(ArrayGOPerf): entry = staticmethod(ArrayGOAK) + class ArrayGOPerfREF(ArrayGOPerf): entry = staticmethod(ArrayGOREF) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class DtypeFromElementPerf(Perf): NUMBER = 1000 @@ -505,14 +539,41 @@ def __init__(self): NT = namedtuple('NT', tuple('abc')) self.values = [ - np.longlong(-1), np.int_(-1), np.intc(-1), np.short(-1), np.byte(-1), - np.ubyte(1), np.ushort(1), np.uintc(1), np.uint(1), np.ulonglong(1), - np.half(1.0), np.single(1.0), np.float64(1.0), np.longdouble(1.0), - np.csingle(1.0j), np.complex_(1.0j), np.clongdouble(1.0j), - np.bool_(0), np.str_('1'), np.str_('1'), np.void(1), - np.object(), np.datetime64('NaT'), np.timedelta64('NaT'), np.nan, - 12, 12.0, True, None, float('NaN'), object(), (1, 2, 3), - NT(1, 2, 3), datetime.date(2020, 12, 31), datetime.timedelta(14), + np.longlong(-1), + np.int_(-1), + np.intc(-1), + np.short(-1), + np.byte(-1), + np.ubyte(1), + np.ushort(1), + np.uintc(1), + np.uint(1), + np.ulonglong(1), + np.half(1.0), + np.single(1.0), + np.float64(1.0), + np.longdouble(1.0), + np.csingle(1.0j), + np.complex_(1.0j), + np.clongdouble(1.0j), + np.bool_(0), + np.str_('1'), + np.str_('1'), + np.void(1), + np.object(), + np.datetime64('NaT'), + np.timedelta64('NaT'), + np.nan, + 12, + 12.0, + True, + None, + float('NaN'), + object(), + (1, 2, 3), + NT(1, 2, 3), + datetime.date(2020, 12, 31), + datetime.timedelta(14), ] # Datetime & Timedelta @@ -529,35 +590,47 @@ def main(self): for val in self.values: self.entry(val) + class DtypeFromElementPerfAK(DtypeFromElementPerf): entry = staticmethod(dtype_from_element_ak) + class DtypeFromElementPerfREF(DtypeFromElementPerf): entry = staticmethod(dtype_from_element_ref) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class IsNaElementPerf(Perf): NUMBER = 1000 def __init__(self): - class FloatSubclass(float): pass - class ComplexSubclass(complex): pass + class FloatSubclass(float): + pass - self.values = [ - # Na-elements - np.datetime64('NaT'), np.timedelta64('NaT'), None, float('NaN'), -float('NaN'), + class ComplexSubclass(complex): + pass - # Non-float, Non-na elements - 1, 'str', np.datetime64('2020-12-31'), datetime.date(2020, 12, 31), False, + self.values = [ + # Na-elements + np.datetime64('NaT'), + np.timedelta64('NaT'), + None, + float('NaN'), + -float('NaN'), + # Non-float, Non-na elements + 1, + 'str', + np.datetime64('2020-12-31'), + datetime.date(2020, 12, 31), + False, ] nan = np.nan complex_nans = [ - complex(nan, 0), - complex(-nan, 0), - complex(0, nan), - complex(0, -nan), + complex(nan, 0), + complex(-nan, 0), + complex(0, nan), + complex(0, -nan), ] float_classes = [float, np.float16, np.float32, np.float64, FloatSubclass] @@ -579,8 +652,20 @@ class ComplexSubclass(complex): pass # Append a wide range of float values, with different precision, across types for val in ( - 1e-1000, 1e-309, 1e-39, 1e-16, 1e-5, 0.1, 0., 1.0, 1e5, 1e16, 1e39, 1e309, 1e1000, - ): + 1e-1000, + 1e-309, + 1e-39, + 1e-16, + 1e-5, + 0.1, + 0.0, + 1.0, + 1e5, + 1e16, + 1e39, + 1e309, + 1e1000, + ): for ctor in float_classes: self.values.append(ctor(val)) self.values.append(ctor(-val)) @@ -596,31 +681,33 @@ def main(self): for val in self.values: self.entry(val) + class IsNaElementPerfAK(IsNaElementPerf): entry = staticmethod(isna_element_ak) + class IsNaElementPerfREF(IsNaElementPerf): entry = staticmethod(isna_element_ref) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class GetNewIndexersAndScreenPerf(Perf): FUNCTIONS = ( - "ordered", - "unordered", - "tiled", - "repeat", - "quick_exit", - "late_exit", - "small", - "large", + 'ordered', + 'unordered', + 'tiled', + 'repeat', + 'quick_exit', + 'late_exit', + 'small', + 'large', ) NUMBER = 5 - TILED = "tiled" - REPEATED = "repeated" - ORDERED = "ordered" - UNORDERED = "unordered" + TILED = 'tiled' + REPEATED = 'repeated' + ORDERED = 'ordered' + UNORDERED = 'unordered' class Key(tp.NamedTuple): type1: str @@ -700,9 +787,7 @@ class GetNewIndexersAndScreenPerfREF(GetNewIndexersAndScreenPerf): entry = staticmethod(get_new_indexers_and_screen_ref) - - -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class SplitAfterCount(Perf): NUMBER = 200_000 @@ -712,14 +797,16 @@ def __init__(self): def main(self): post = self.entry(self.string, ',', 20) + class SplitAfterCountAK(SplitAfterCount): entry = staticmethod(split_after_count_ak) + class SplitAfterCountREF(SplitAfterCount): entry = staticmethod(split_after_count_ref) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class CountIterations(Perf): NUMBER = 10_000 @@ -730,14 +817,16 @@ def main(self): post = self.entry(self.strio) self.strio.seek(0) + class CountIterationsAK(CountIterations): entry = staticmethod(count_iteration_ak) + class CountIterationsREF(CountIterations): entry = staticmethod(count_iteration_ref) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- class SliceToAscending(Perf): NUMBER = 1_000_000 @@ -747,30 +836,35 @@ def __init__(self): def main(self): _ = self.entry(self.slc, 101) + class SliceToAscendingAK(SliceToAscending): entry = staticmethod(slice_to_ascending_slice_ak) + class SliceToAscendingREF(SliceToAscending): entry = staticmethod(slice_to_ascending_slice_ref) -#------------------------------------------------------------------------------- + +# ------------------------------------------------------------------------------- + def get_arg_parser(): p = argparse.ArgumentParser( description='ArrayKit performance tool.', - ) - p.add_argument("--names", - nargs='+', - help='Provide one or more performance tests by name.') + ) + p.add_argument( + '--names', nargs='+', help='Provide one or more performance tests by name.' + ) return p + def main(): options = get_arg_parser().parse_args() match = None if not options.names else set(options.names) records = [('cls', 'func', 'ak', 'ref', 'ref/ak')] - for cls_perf in Perf.__subclasses__(): # only get one level + for cls_perf in Perf.__subclasses__(): # only get one level cls_map = {} if match and cls_perf.__name__ not in match: continue @@ -784,15 +878,24 @@ def main(): results = {} for key, cls_runner in cls_map.items(): runner = cls_runner() - if hasattr(runner, 'pre'): #TEMP, for branches + if hasattr(runner, 'pre'): # TEMP, for branches raise RuntimeError('convert your pre() method to __init__()') f = getattr(runner, func_attr) - results[key] = timeit.timeit('f()', - globals=locals(), - number=cls_runner.NUMBER) - records.append((cls_perf.__name__, func_attr, results['ak'], results['ref'], results['ref'] / results['ak'])) + results[key] = timeit.timeit( + 'f()', globals=locals(), number=cls_runner.NUMBER + ) + records.append( + ( + cls_perf.__name__, + func_attr, + results['ak'], + results['ref'], + results['ref'] / results['ak'], + ) + ) + + import pandas as pd # NOTE: cannot make StaticFrame a dependency - import pandas as pd # NOTE: cannot make StaticFrame a dependency riter = iter(records) columns = next(riter) f = pd.DataFrame.from_records(riter, columns=columns) @@ -804,5 +907,6 @@ def main(): # (r.ljust(width) if isinstance(r, str) else str(round(r, 8)).ljust(width)) for r in record # )) + if __name__ == '__main__': main() diff --git a/performance/auto_map/fixtures.py b/performance/auto_map/fixtures.py index e098a37e..f73c6ce3 100644 --- a/performance/auto_map/fixtures.py +++ b/performance/auto_map/fixtures.py @@ -24,7 +24,7 @@ def __init__(self, array: np.ndarray): class FixtureFactory: - NAME = "" + NAME = '' SORT = 0 CACHE = {} # can be shared for all classes @@ -42,7 +42,7 @@ def get_label_array(cls, size: int) -> tp.Tuple[str, PayLoad]: class FFInt64(FixtureFactory): - NAME = "int64" + NAME = 'int64' SORT = 0 @staticmethod @@ -53,7 +53,7 @@ def get_array(size: int) -> np.ndarray: class FFInt32(FixtureFactory): - NAME = "int32" + NAME = 'int32' SORT = 1 @staticmethod @@ -64,7 +64,7 @@ def get_array(size: int) -> np.ndarray: class FFUInt64(FixtureFactory): - NAME = "uint64" + NAME = 'uint64' SORT = 2 @staticmethod @@ -75,7 +75,7 @@ def get_array(size: int) -> np.ndarray: class FFUInt32(FixtureFactory): - NAME = "uint32" + NAME = 'uint32' SORT = 3 @staticmethod @@ -86,7 +86,7 @@ def get_array(size: int) -> np.ndarray: class FFFloat64(FixtureFactory): - NAME = "float64" + NAME = 'float64' SORT = 4 @staticmethod @@ -97,7 +97,7 @@ def get_array(size: int) -> np.ndarray: class FFFloat32(FixtureFactory): - NAME = "float32" + NAME = 'float32' SORT = 5 @staticmethod @@ -108,154 +108,154 @@ def get_array(size: int) -> np.ndarray: def get_string_array(size: int, char_count: int, kind: str) -> str: - fmt = f"-<{char_count}" + fmt = f'-<{char_count}' array = np.array( [ - f"{hex(e) * (char_count // 8)}".format(fmt) + f'{hex(e) * (char_count // 8)}'.format(fmt) for e in range(INT_START, INT_START + size) ], - dtype=f"{kind}{char_count}", + dtype=f'{kind}{char_count}', ) array.flags.writeable = False return array class FFU8(FixtureFactory): - NAME = "U8" + NAME = 'U8' SORT = 6 @staticmethod def get_array(size: int) -> np.ndarray: - return get_string_array(size, 8, "U") + return get_string_array(size, 8, 'U') class FFU16(FixtureFactory): - NAME = "U16" + NAME = 'U16' SORT = 7 @staticmethod def get_array(size: int) -> np.ndarray: - return get_string_array(size, 16, "U") + return get_string_array(size, 16, 'U') class FFU32(FixtureFactory): - NAME = "U32" + NAME = 'U32' SORT = 8 @staticmethod def get_array(size: int) -> np.ndarray: - return get_string_array(size, 32, "U") + return get_string_array(size, 32, 'U') class FFU64(FixtureFactory): - NAME = "U64" + NAME = 'U64' SORT = 9 @staticmethod def get_array(size: int) -> np.ndarray: - return get_string_array(size, 64, "U") + return get_string_array(size, 64, 'U') class FFU128(FixtureFactory): - NAME = "U128" + NAME = 'U128' SORT = 10 @staticmethod def get_array(size: int) -> np.ndarray: - return get_string_array(size, 128, "U") + return get_string_array(size, 128, 'U') class FFS8(FixtureFactory): - NAME = "S8" + NAME = 'S8' SORT = 11 @staticmethod def get_array(size: int) -> np.ndarray: - return get_string_array(size, 8, "S") + return get_string_array(size, 8, 'S') class FFS16(FixtureFactory): - NAME = "S16" + NAME = 'S16' SORT = 12 @staticmethod def get_array(size: int) -> np.ndarray: - return get_string_array(size, 16, "S") + return get_string_array(size, 16, 'S') class FFS32(FixtureFactory): - NAME = "S32" + NAME = 'S32' SORT = 13 @staticmethod def get_array(size: int) -> np.ndarray: - return get_string_array(size, 32, "S") + return get_string_array(size, 32, 'S') class FFS64(FixtureFactory): - NAME = "S64" + NAME = 'S64' SORT = 14 @staticmethod def get_array(size: int) -> np.ndarray: - return get_string_array(size, 64, "S") + return get_string_array(size, 64, 'S') class FFS128(FixtureFactory): - NAME = "S128" + NAME = 'S128' SORT = 15 @staticmethod def get_array(size: int) -> np.ndarray: - return get_string_array(size, 128, "S") + return get_string_array(size, 128, 'S') class FFDTY(FixtureFactory): - NAME = "dt[Y]" + NAME = 'dt[Y]' SORT = 20 @staticmethod def get_array(size: int) -> np.ndarray: - array = np.arange(INT_START, INT_START + size, dtype="datetime64[Y]") + array = np.arange(INT_START, INT_START + size, dtype='datetime64[Y]') array.flags.writeable = False return array class FFDTD(FixtureFactory): - NAME = "dt[D]" + NAME = 'dt[D]' SORT = 21 @staticmethod def get_array(size: int) -> np.ndarray: - array = np.arange(INT_START, INT_START + size, dtype="datetime64[D]") + array = np.arange(INT_START, INT_START + size, dtype='datetime64[D]') array.flags.writeable = False return array class FFDTs(FixtureFactory): - NAME = "dt[s]" + NAME = 'dt[s]' SORT = 22 @staticmethod def get_array(size: int) -> np.ndarray: - array = np.arange(INT_START, INT_START + size, dtype="datetime64[s]") + array = np.arange(INT_START, INT_START + size, dtype='datetime64[s]') array.flags.writeable = False return array class FFDTns(FixtureFactory): - NAME = "dt[ns]" + NAME = 'dt[ns]' SORT = 23 @staticmethod def get_array(size: int) -> np.ndarray: - array = np.arange(INT_START, INT_START + size, dtype="datetime64[ns]") + array = np.arange(INT_START, INT_START + size, dtype='datetime64[ns]') array.flags.writeable = False return array class FFObject(FixtureFactory): - NAME = "object" + NAME = 'object' SORT = 5 @staticmethod diff --git a/performance/auto_map/get-all-any.py b/performance/auto_map/get-all-any.py index bc6d48b6..ce670d7d 100644 --- a/performance/auto_map/get-all-any.py +++ b/performance/auto_map/get-all-any.py @@ -42,7 +42,7 @@ class MapProcessor: - NAME = "" + NAME = '' SORT = -1 def __init__(self, pl: PayLoad): @@ -54,7 +54,7 @@ def __init__(self, pl: PayLoad): # ------------------------------------------------------------------------------- class ListCompAllScalar(MapProcessor): - NAME = "all: list comp, lookup by scalar" + NAME = 'all: list comp, lookup by scalar' SORT = 0 def __call__(self): @@ -72,7 +72,7 @@ def __call__(self): class GetAllListScalar(MapProcessor): - NAME = "all: get all, lookup by scalar list" + NAME = 'all: get all, lookup by scalar list' SORT = 0 def __call__(self): @@ -81,7 +81,7 @@ def __call__(self): class GetAllArray(MapProcessor): - NAME = "all: get all, lookup by array" + NAME = 'all: get all, lookup by array' SORT = 0 def __call__(self): @@ -91,7 +91,7 @@ def __call__(self): # ------------------------------------------------------------------------------- class ListCompAnyScalar(MapProcessor): - NAME = "any: list comp, lookup by scalar" + NAME = 'any: list comp, lookup by scalar' SORT = 0 def __call__(self): @@ -109,7 +109,7 @@ def __call__(self): class GetAnyListScalar(MapProcessor): - NAME = "any: get all, lookup by scalar list" + NAME = 'any: get all, lookup by scalar list' SORT = 0 def __call__(self): @@ -118,7 +118,7 @@ def __call__(self): class GetAnyArray(MapProcessor): - NAME = "any: get all, lookup by array" + NAME = 'any: get all, lookup by array' SORT = 0 def __call__(self): @@ -132,7 +132,7 @@ def __call__(self): def get_versions() -> str: import platform - return f"OS: {platform.system()} / ArrayMap: {arraymap.__version__} / NumPy: {np.__version__}\n" + return f'OS: {platform.system()} / ArrayMap: {arraymap.__version__} / NumPy: {np.__version__}\n' CLS_FF = ( @@ -157,52 +157,51 @@ def get_versions() -> str: def seconds_to_display(seconds: float) -> str: seconds /= NUMBER if seconds < 1e-4: - return f"{seconds * 1e6: .1f} (µs)" + return f'{seconds * 1e6: .1f} (µs)' if seconds < 1e-1: - return f"{seconds * 1e3: .1f} (ms)" - return f"{seconds: .1f} (s)" + return f'{seconds * 1e3: .1f} (ms)' + return f'{seconds: .1f} (s)' -def plot_performance(frame, suffix: str = ""): - fixture_total = len(frame["fixture"].unique()) - cat_total = len(frame["size"].unique()) - processor_total = len(frame["cls_processor"].unique()) +def plot_performance(frame, suffix: str = ''): + fixture_total = len(frame['fixture'].unique()) + cat_total = len(frame['size'].unique()) + processor_total = len(frame['cls_processor'].unique()) fig, axes = plt.subplots(cat_total, fixture_total) # cmap = plt.get_cmap('terrain') - cmap = plt.get_cmap("plasma") + cmap = plt.get_cmap('plasma') color = cmap(np.arange(processor_total) / processor_total) # category is the size of the array - for cat_count, (cat_label, cat) in enumerate(frame.groupby("size")): - + for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')): # fixture is the data type fixture - fixture_data = {fix_label: fix for fix_label, fix in cat.groupby("fixture")} + fixture_data = {fix_label: fix for fix_label, fix in cat.groupby('fixture')} for fixture_count, fixture_label in enumerate(FF_ORDER): fixture = fixture_data[fixture_label] ax = axes[cat_count][fixture_count] # set order by cls_processor, i.e., the type of test being done - fixture["sort"] = [f.SORT for f in fixture["cls_processor"]] - fixture = fixture.sort_values("sort") + fixture['sort'] = [f.SORT for f in fixture['cls_processor']] + fixture = fixture.sort_values('sort') - results = fixture["time"].values.tolist() - names = [cls.NAME for cls in fixture["cls_processor"]] + results = fixture['time'].values.tolist() + names = [cls.NAME for cls in fixture['cls_processor']] # x = np.arange(len(results)) names_display = names post = ax.bar(names_display, results, color=color) # density, position = fixture_label.split('-') # cat_label is the size of the array - title = f"{cat_label:.0e}\n{fixture_label}" + title = f'{cat_label:.0e}\n{fixture_label}' ax.set_title(title, fontsize=6) ax.set_box_aspect(0.8) - time_max = fixture["time"].max() - time_min = fixture["time"].min() + time_max = fixture['time'].max() + time_min = fixture['time'].min() y_ticks = [0, time_min, time_max * 0.5, time_max] y_labels = [ - "", + '', seconds_to_display(time_min), seconds_to_display(time_max * 0.5), seconds_to_display(time_max), @@ -216,24 +215,24 @@ def plot_performance(frame, suffix: str = ""): ax.set_yticklabels(y_labels, fontsize=4) # ax.set_xticks(x, names_display, rotation='vertical') ax.tick_params( - axis="x", + axis='x', bottom=False, labelbottom=False, ) ax.tick_params( - axis="y", + axis='y', length=2, width=0.5, pad=1, ) fig.set_size_inches(9, 4) # width, height - fig.legend(post, names_display, loc="center right", fontsize=6) + fig.legend(post, names_display, loc='center right', fontsize=6) # horizontal, vertical - fig.text(0.05, 0.96, f"AutoMap {suffix.title()}: {NUMBER} Iterations", fontsize=10) + fig.text(0.05, 0.96, f'AutoMap {suffix.title()}: {NUMBER} Iterations', fontsize=10) fig.text(0.05, 0.90, get_versions(), fontsize=6) - fp = f"/tmp/arraymap-{suffix}.png" + fp = f'/tmp/arraymap-{suffix}.png' plt.subplots_adjust( left=0.075, bottom=0.05, @@ -246,10 +245,10 @@ def plot_performance(frame, suffix: str = ""): print(fp) plt.savefig(fp, dpi=300) - if sys.platform.startswith("linux"): - os.system(f"eog {fp}&") + if sys.platform.startswith('linux'): + os.system(f'eog {fp}&') else: - os.system(f"open {fp}") + os.system(f'open {fp}') def run_test(processors, suffix): @@ -263,7 +262,7 @@ def run_test(processors, suffix): record = [cls, NUMBER, fixture_label, size] print(record) try: - result = timeit.timeit(f"runner()", globals=locals(), number=NUMBER) + result = timeit.timeit(f'runner()', globals=locals(), number=NUMBER) except OSError: result = np.nan finally: @@ -272,14 +271,13 @@ def run_test(processors, suffix): records.append(record) f = pd.DataFrame.from_records( - records, columns=("cls_processor", "number", "fixture", "size", "time") + records, columns=('cls_processor', 'number', 'fixture', 'size', 'time') ) print(f) plot_performance(f, suffix) -if __name__ == "__main__": - +if __name__ == '__main__': cls_instantiate = ( ListCompAllScalar, # GetAllListObj, @@ -291,4 +289,4 @@ def run_test(processors, suffix): GetAnyArray, ) - run_test(cls_instantiate, "get-all-any") + run_test(cls_instantiate, 'get-all-any') diff --git a/performance/auto_map/npy-opt.py b/performance/auto_map/npy-opt.py index b7c152f0..3d7478a5 100644 --- a/performance/auto_map/npy-opt.py +++ b/performance/auto_map/npy-opt.py @@ -29,7 +29,7 @@ class MapProcessor: - NAME = "" + NAME = '' SORT = -1 def __init__(self, pl: PayLoad): @@ -43,7 +43,7 @@ def __init__(self, pl: PayLoad): # ------------------------------------------------------------------------------- class FAMLInstantiate(MapProcessor): - NAME = "FAM(L): instantiate" + NAME = 'FAM(L): instantiate' SORT = 0 def __call__(self): @@ -52,7 +52,7 @@ def __call__(self): class AMAInstantiate(MapProcessor): - NAME = "AM(A): instantiate" + NAME = 'AM(A): instantiate' SORT = 0 def __call__(self): @@ -61,7 +61,7 @@ def __call__(self): class FAMAInstantiate(MapProcessor): - NAME = "FAM(A): instantiate" + NAME = 'FAM(A): instantiate' SORT = 0 def __call__(self): @@ -70,7 +70,7 @@ def __call__(self): class FAMAtolistInstantiate(MapProcessor): - NAME = "FAM(Atolist): instantiate" + NAME = 'FAM(Atolist): instantiate' SORT = 0 def __call__(self): @@ -79,7 +79,7 @@ def __call__(self): class DictInstantiate(MapProcessor): - NAME = "Dict: instantiate" + NAME = 'Dict: instantiate' SORT = 0 def __call__(self): @@ -89,7 +89,7 @@ def __call__(self): # ------------------------------------------------------------------------------- class FAMLLookup(MapProcessor): - NAME = "FAM(L): lookup" + NAME = 'FAM(L): lookup' SORT = 0 def __call__(self): @@ -99,7 +99,7 @@ def __call__(self): class FAMALookup(MapProcessor): - NAME = "FAM(A): lookup" + NAME = 'FAM(A): lookup' SORT = 0 def __call__(self): @@ -109,7 +109,7 @@ def __call__(self): class DictLookup(MapProcessor): - NAME = "Dict: lookup" + NAME = 'Dict: lookup' SORT = 0 def __call__(self): @@ -120,7 +120,7 @@ def __call__(self): # ------------------------------------------------------------------------------- class FAMLLookupScalar(MapProcessor): - NAME = "FAM(L): lookup scalar" + NAME = 'FAM(L): lookup scalar' SORT = 0 def __call__(self): @@ -130,7 +130,7 @@ def __call__(self): class FAMALookupScalar(MapProcessor): - NAME = "FAM(A): lookup scalar" + NAME = 'FAM(A): lookup scalar' SORT = 0 def __call__(self): @@ -140,7 +140,7 @@ def __call__(self): class AMALookupScalar(MapProcessor): - NAME = "AM(A): lookup scalar" + NAME = 'AM(A): lookup scalar' SORT = 0 def __call__(self): @@ -150,7 +150,7 @@ def __call__(self): class DictLookupScalar(MapProcessor): - NAME = "Dict: lookup scalar" + NAME = 'Dict: lookup scalar' SORT = 0 def __call__(self): @@ -161,7 +161,7 @@ def __call__(self): # ------------------------------------------------------------------------------- class FAMLNotIn(MapProcessor): - NAME = "FAM(L): not in" + NAME = 'FAM(L): not in' SORT = 0 def __call__(self): @@ -171,7 +171,7 @@ def __call__(self): class FAMANotIn(MapProcessor): - NAME = "FAM(A): not in" + NAME = 'FAM(A): not in' SORT = 0 def __call__(self): @@ -181,7 +181,7 @@ def __call__(self): class AMANotIn(MapProcessor): - NAME = "AM(A): not in" + NAME = 'AM(A): not in' SORT = 0 def __call__(self): @@ -191,7 +191,7 @@ def __call__(self): class DictNotIn(MapProcessor): - NAME = "Dict: not in" + NAME = 'Dict: not in' SORT = 0 def __call__(self): @@ -202,7 +202,7 @@ def __call__(self): # ------------------------------------------------------------------------------- class FAMLKeys(MapProcessor): - NAME = "FAM(L): keys" + NAME = 'FAM(L): keys' SORT = 0 def __call__(self): @@ -211,7 +211,7 @@ def __call__(self): class FAMAKeys(MapProcessor): - NAME = "FAM(A): keys" + NAME = 'FAM(A): keys' SORT = 0 def __call__(self): @@ -220,7 +220,7 @@ def __call__(self): class DictKeys(MapProcessor): - NAME = "Dict: keys" + NAME = 'Dict: keys' SORT = 0 def __call__(self): @@ -230,7 +230,7 @@ def __call__(self): # ------------------------------------------------------------------------------- class FAMLItems(MapProcessor): - NAME = "FAM(L): items" + NAME = 'FAM(L): items' SORT = 0 def __call__(self): @@ -239,7 +239,7 @@ def __call__(self): class FAMAItems(MapProcessor): - NAME = "FAM(A): items" + NAME = 'FAM(A): items' SORT = 0 def __call__(self): @@ -248,7 +248,7 @@ def __call__(self): class DictItems(MapProcessor): - NAME = "Dict: items" + NAME = 'Dict: items' SORT = 0 def __call__(self): @@ -259,7 +259,7 @@ def __call__(self): def get_versions() -> str: import platform - return f"OS: {platform.system()} / ArrayMap: {arraymap.__version__} / NumPy: {np.__version__}\n" + return f'OS: {platform.system()} / ArrayMap: {arraymap.__version__} / NumPy: {np.__version__}\n' CLS_FF = ( @@ -293,52 +293,51 @@ def get_versions() -> str: def seconds_to_display(seconds: float) -> str: seconds /= NUMBER if seconds < 1e-4: - return f"{seconds * 1e6: .1f} (µs)" + return f'{seconds * 1e6: .1f} (µs)' if seconds < 1e-1: - return f"{seconds * 1e3: .1f} (ms)" - return f"{seconds: .1f} (s)" + return f'{seconds * 1e3: .1f} (ms)' + return f'{seconds: .1f} (s)' -def plot_performance(frame, suffix: str = ""): - fixture_total = len(frame["fixture"].unique()) - cat_total = len(frame["size"].unique()) - processor_total = len(frame["cls_processor"].unique()) +def plot_performance(frame, suffix: str = ''): + fixture_total = len(frame['fixture'].unique()) + cat_total = len(frame['size'].unique()) + processor_total = len(frame['cls_processor'].unique()) fig, axes = plt.subplots(cat_total, fixture_total) # cmap = plt.get_cmap('terrain') - cmap = plt.get_cmap("plasma") + cmap = plt.get_cmap('plasma') color = cmap(np.arange(processor_total) / processor_total) # category is the size of the array - for cat_count, (cat_label, cat) in enumerate(frame.groupby("size")): - + for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')): # fixture is the data type fixture - fixture_data = {fix_label: fix for fix_label, fix in cat.groupby("fixture")} + fixture_data = {fix_label: fix for fix_label, fix in cat.groupby('fixture')} for fixture_count, fixture_label in enumerate(FF_ORDER): fixture = fixture_data[fixture_label] ax = axes[cat_count][fixture_count] # set order by cls_processor, i.e., the type of test being done - fixture["sort"] = [f.SORT for f in fixture["cls_processor"]] - fixture = fixture.sort_values("sort") + fixture['sort'] = [f.SORT for f in fixture['cls_processor']] + fixture = fixture.sort_values('sort') - results = fixture["time"].values.tolist() - names = [cls.NAME for cls in fixture["cls_processor"]] + results = fixture['time'].values.tolist() + names = [cls.NAME for cls in fixture['cls_processor']] # x = np.arange(len(results)) names_display = names post = ax.bar(names_display, results, color=color) # density, position = fixture_label.split('-') # cat_label is the size of the array - title = f"{cat_label:.0e}\n{fixture_label}" + title = f'{cat_label:.0e}\n{fixture_label}' ax.set_title(title, fontsize=6) ax.set_box_aspect(0.8) - time_max = fixture["time"].max() - time_min = fixture["time"].min() + time_max = fixture['time'].max() + time_min = fixture['time'].min() y_ticks = [0, time_min, time_max * 0.5, time_max] y_labels = [ - "", + '', seconds_to_display(time_min), seconds_to_display(time_max * 0.5), seconds_to_display(time_max), @@ -352,23 +351,23 @@ def plot_performance(frame, suffix: str = ""): ax.set_yticklabels(y_labels, fontsize=4) # ax.set_xticks(x, names_display, rotation='vertical') ax.tick_params( - axis="x", + axis='x', bottom=False, labelbottom=False, ) ax.tick_params( - axis="y", + axis='y', length=2, width=0.5, pad=1, ) fig.set_size_inches(9, 3) # width, height - fig.legend(post, names_display, loc="center right", fontsize=6) + fig.legend(post, names_display, loc='center right', fontsize=6) # horizontal, vertical - fig.text(0.05, 0.96, f"AutoMap {suffix.title()}: {NUMBER} Iterations", fontsize=10) + fig.text(0.05, 0.96, f'AutoMap {suffix.title()}: {NUMBER} Iterations', fontsize=10) fig.text(0.05, 0.90, get_versions(), fontsize=6) - fp = f"/tmp/arraymap-{suffix}.png" + fp = f'/tmp/arraymap-{suffix}.png' plt.subplots_adjust( left=0.075, bottom=0.05, @@ -380,10 +379,10 @@ def plot_performance(frame, suffix: str = ""): # plt.rcParams.update({'font.size': 22}) plt.savefig(fp, dpi=300) - if sys.platform.startswith("linux"): - os.system(f"eog {fp}&") + if sys.platform.startswith('linux'): + os.system(f'eog {fp}&') else: - os.system(f"open {fp}") + os.system(f'open {fp}') def run_test(processors, suffix): @@ -397,7 +396,7 @@ def run_test(processors, suffix): record = [cls, NUMBER, fixture_label, size] print(record) try: - result = timeit.timeit(f"runner()", globals=locals(), number=NUMBER) + result = timeit.timeit(f'runner()', globals=locals(), number=NUMBER) except OSError: result = np.nan finally: @@ -406,14 +405,13 @@ def run_test(processors, suffix): records.append(record) f = pd.DataFrame.from_records( - records, columns=("cls_processor", "number", "fixture", "size", "time") + records, columns=('cls_processor', 'number', 'fixture', 'size', 'time') ) print(f) plot_performance(f, suffix) -if __name__ == "__main__": - +if __name__ == '__main__': CLS_PROCESSOR = ( FAMLInstantiate, FAMAInstantiate, @@ -451,5 +449,5 @@ def run_test(processors, suffix): # DictNotIn, ) - run_test(cls_instantiate, "instantiate") - run_test(cls_lookup, "lookup") + run_test(cls_instantiate, 'instantiate') + run_test(cls_lookup, 'lookup') diff --git a/performance/reference/array_go.py b/performance/reference/array_go.py index c2b8d502..ecc566c5 100644 --- a/performance/reference/array_go.py +++ b/performance/reference/array_go.py @@ -1,4 +1,3 @@ - import typing as tp import numpy as np @@ -7,31 +6,35 @@ from performance.reference.util import DTYPE_OBJECT from performance.reference.util import array_deepcopy + class ArrayGO: - ''' + """ A grow only, one-dimensional, object type array, specifically for usage in IndexHierarchy IndexLevel objects. - ''' - _DTYPE = DTYPE_OBJECT # only object arrays are supported + """ + + _DTYPE = DTYPE_OBJECT # only object arrays are supported _array: tp.Optional[np.ndarray] _array_mutable: tp.Optional[tp.List[tp.Any]] __slots__ = ( - '_array', - '_array_mutable', - '_recache', - ) + '_array', + '_array_mutable', + '_recache', + ) # NOTE: this can be implemented with one array, where we overallocate for growth, then grow as needed, or with an array and list. Since most instaces will not need to grow (only edge nodes), overall efficiency might be greater with a list - def __init__(self, - iterable: tp.Union[np.ndarray, tp.List[object]], - *, - own_iterable: bool = False) -> None: - ''' + def __init__( + self, + iterable: tp.Union[np.ndarray, tp.List[object]], + *, + own_iterable: bool = False, + ) -> None: + """ Args: own_iterable: flag iterable as ownable by this instance. - ''' + """ if isinstance(iterable, np.ndarray): if own_iterable: self._array = iterable @@ -42,7 +45,7 @@ def __init__(self, raise NotImplementedError('only object arrays are supported') self._recache = False self._array_mutable = None - else: # assume it is a list or listable + else: # assume it is a list or listable self._array = None self._recache = True # always call list to get new object, or realize a generator @@ -51,39 +54,35 @@ def __init__(self, else: self._array_mutable = list(iterable) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def __deepcopy__(self, memo: tp.Dict[int, tp.Any]) -> 'ArrayGO': if self._recache: self._update_array_cache() obj = self.__new__(self.__class__) obj._array = array_deepcopy(self._array, memo) - obj._array_mutable = None # after updating cache + obj._array_mutable = None # after updating cache obj._recache = False memo[id(self)] = obj - return obj #type: ignore + return obj # type: ignore def __copy__(self) -> 'ArrayGO': - '''Return a shallow copy of this ArrayGO. - ''' + """Return a shallow copy of this ArrayGO.""" if self._recache: self._update_array_cache() return self.__class__(self._array, own_iterable=True) def copy(self) -> 'ArrayGO': - '''Return a shallow copy of this ArrayGO. - ''' + """Return a shallow copy of this ArrayGO.""" return self.__copy__() - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def _update_array_cache(self) -> None: if self._array_mutable is not None: if self._array is not None: len_base = len(self._array) - array = np.empty( - len_base + len(self._array_mutable), - self._DTYPE) + array = np.empty(len_base + len(self._array_mutable), self._DTYPE) array[:len_base] = self._array array[len_base:] = self._array_mutable array.flags.writeable = False @@ -98,17 +97,17 @@ def _update_array_cache(self) -> None: def __iter__(self) -> tp.Iterator[tp.Any]: if self._recache: self._update_array_cache() - return iter(self._array) #type: ignore + return iter(self._array) # type: ignore def __getitem__(self, key: tp.Any) -> tp.Any: if self._recache: self._update_array_cache() - return self._array.__getitem__(key) #type: ignore + return self._array.__getitem__(key) # type: ignore def __len__(self) -> int: if self._recache: self._update_array_cache() - return len(self._array) #type: ignore + return len(self._array) # type: ignore def append(self, value: tp.Iterable[object]) -> None: if self._array_mutable is None: @@ -124,10 +123,7 @@ def extend(self, values: tp.Iterable[object]) -> None: @property def values(self) -> np.ndarray: - '''Return the immutable labels array - ''' + """Return the immutable labels array""" if self._recache: self._update_array_cache() return self._array - - diff --git a/performance/reference/block_index.py b/performance/reference/block_index.py index 0b516050..55bf8709 100644 --- a/performance/reference/block_index.py +++ b/performance/reference/block_index.py @@ -1,17 +1,16 @@ - from arraykit import shape_filter from arraykit import resolve_dtype import typing as tp import numpy as np -#------------------------------------------------------------------------------- + +# ------------------------------------------------------------------------------- def from_blocks( - raw_blocks: tp.Iterable[np.ndarray], - ): - '''Simulation of legacy routine within TypeBlocks. - ''' - index: tp.List[tp.Tuple[int, int]] = [] # columns position to blocks key + raw_blocks: tp.Iterable[np.ndarray], +): + """Simulation of legacy routine within TypeBlocks.""" + index: tp.List[tp.Tuple[int, int]] = [] # columns position to blocks key block_count = 0 row_count = None column_count = 0 @@ -21,11 +20,13 @@ def from_blocks( if not block.__class__ is np.ndarray: raise ErrorInitTypeBlocks(f'found non array block: {block}') if block.ndim > 2: - raise ErrorInitTypeBlocks(f'cannot include array with {block.ndim} dimensions') + raise ErrorInitTypeBlocks( + f'cannot include array with {block.ndim} dimensions' + ) r, c = shape_filter(block) - if row_count is not None and r != row_count: #type: ignore [unreachable] + if row_count is not None and r != row_count: # type: ignore [unreachable] raise ErrorInitTypeBlocks(f'mismatched row count: {r}: {row_count}') else: row_count = r @@ -43,20 +44,21 @@ def from_blocks( block_count += 1 return (row_count, column_count), index -#------------------------------------------------------------------------------- + +# ------------------------------------------------------------------------------- def cols_to_slice(indices: tp.Sequence[int]) -> slice: - '''Translate an iterable of contiguous integers into a slice. -Integers are assumed to be ordered (ascending or descending) and contiguous. - ''' + """Translate an iterable of contiguous integers into a slice. + Integers are assumed to be ordered (ascending or descending) and contiguous. + """ start_idx = indices[0] # single column as a single slice if len(indices) == 1: return slice(start_idx, start_idx + 1) stop_idx = indices[-1] - if stop_idx > start_idx: # ascending indices + if stop_idx > start_idx: # ascending indices return slice(start_idx, stop_idx + 1) if stop_idx == 0: @@ -64,11 +66,13 @@ def cols_to_slice(indices: tp.Sequence[int]) -> slice: # stop is less than start, need to reduce by 1 to cover range return slice(start_idx, stop_idx - 1, -1) -def indices_to_contiguous_pairs(indices: tp.Iterable[tp.Tuple[int, int]] - ) -> tp.Iterator[tp.Tuple[int, slice]]: - '''Indices are pairs of (block_idx, value); convert these to pairs of (block_idx, slice) when we identify contiguous indices -within a block (these are block slices) - ''' + +def indices_to_contiguous_pairs( + indices: tp.Iterable[tp.Tuple[int, int]], +) -> tp.Iterator[tp.Tuple[int, slice]]: + """Indices are pairs of (block_idx, value); convert these to pairs of (block_idx, slice) when we identify contiguous indices + within a block (these are block slices) + """ # store pairs of block idx, ascending col list last: tp.Optional[tp.Tuple[int, int]] = None @@ -110,7 +114,7 @@ def build_slice(start, end_inclusive): # return start if start <= end_inclusive: - return slice(start, end_inclusive + 1, None) # can be 1 + return slice(start, end_inclusive + 1, None) # can be 1 # reverse slice if end_inclusive == 0: return slice(start, None, -1) @@ -120,13 +124,13 @@ def getter(self) -> tp.Tuple[int, slice]: slice_start = -1 while True: if self.next_block == -2: - return None # terminate the loop + return None # terminate the loop if self.next_block != -1: # discontinuity found on last iteration, set new start self.last_block = self.next_block self.last_column = self.next_column slice_start = self.last_column - self.next_block = -1 # clear next state + self.next_block = -1 # clear next state self.next_column = -1 try: @@ -144,7 +148,9 @@ def getter(self) -> tp.Tuple[int, slice]: slice_start = column continue - if self.last_block == block and abs(column - self.last_column) == 1: # contiguous + if ( + self.last_block == block and abs(column - self.last_column) == 1 + ): # contiguous self.last_column = column continue @@ -154,7 +160,6 @@ def getter(self) -> tp.Tuple[int, slice]: self.next_column = column return self.last_block, self.build_slice(slice_start, self.last_column) - def iter(self) -> tp.Iterator[tp.Tuple[int, slice]]: while True: post = self.getter() @@ -163,8 +168,8 @@ def iter(self) -> tp.Iterator[tp.Tuple[int, slice]]: else: break -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- if __name__ == '__main__': @@ -177,14 +182,12 @@ def iter(self) -> tp.Iterator[tp.Tuple[int, slice]]: [(0, 0), (2, 3), (2, 2), (2, 1), (2, 6), (10, 1)], [(2, 3), (0, 0), (2, 2), (2, 1), (2, 6), (2, 7)], [(2, 3), (2, 2), (5, 2), (5, 1), (5, 0), (2, 1), (2, 0)], - ) for sample in samples: p1 = list(indices_to_contiguous_pairs(sample)) print(sample) print(p1) - iterc = IterContiguous(sample) p2 = list(iterc.iter()) - print(p2) \ No newline at end of file + print(p2) diff --git a/performance/reference/util.py b/performance/reference/util.py index 055c583a..d5f84079 100644 --- a/performance/reference/util.py +++ b/performance/reference/util.py @@ -10,9 +10,12 @@ DTYPE_OBJECT_KIND = 'O' DTYPE_BOOL_KIND = 'b' -DTYPE_STR_KINDS = ('U', 'S') # S is np.bytes_ -DTYPE_INT_KINDS = ('i', 'u') # signed and unsigned -DTYPE_INEXACT_KINDS = (DTYPE_FLOAT_KIND, DTYPE_COMPLEX_KIND) # kinds that support NaN values +DTYPE_STR_KINDS = ('U', 'S') # S is np.bytes_ +DTYPE_INT_KINDS = ('i', 'u') # signed and unsigned +DTYPE_INEXACT_KINDS = ( + DTYPE_FLOAT_KIND, + DTYPE_COMPLEX_KIND, +) # kinds that support NaN values DTYPE_NAT_KINDS = (DTYPE_DATETIME_KIND, DTYPE_TIMEDELTA_KIND) DTYPE_OBJECT = np.dtype(object) @@ -25,29 +28,27 @@ DTYPES_BOOL = (DTYPE_BOOL,) DTYPES_INEXACT = (DTYPE_FLOAT_DEFAULT, DTYPE_COMPLEX_DEFAULT) -EMPTY_SLICE = slice(0, 0) # gathers nothing +EMPTY_SLICE = slice(0, 0) # gathers nothing def mloc(array: np.ndarray) -> int: - '''Return the memory location of an array. - ''' + """Return the memory location of an array.""" return tp.cast(int, array.__array_interface__['data'][0]) def immutable_filter(src_array: np.ndarray) -> np.ndarray: - '''Pass an immutable array; otherwise, return an immutable copy of the provided array. - ''' + """Pass an immutable array; otherwise, return an immutable copy of the provided array.""" if src_array.flags.writeable: dst_array = src_array.copy() dst_array.flags.writeable = False return dst_array - return src_array # keep it as is + return src_array # keep it as is def name_filter(name): - ''' + """ For name attributes on containers, only permit recursively hashable objects. - ''' + """ try: hash(name) except TypeError: @@ -56,19 +57,18 @@ def name_filter(name): def shape_filter(array: np.ndarray) -> tp.Tuple[int, int]: - '''Represent a 1D array as a 2D array with length as rows of a single-column array. + """Represent a 1D array as a 2D array with length as rows of a single-column array. Return: row, column count for a block of ndim 1 or ndim 2. - ''' + """ if array.ndim == 1: return array.shape[0], 1 - return array.shape #type: ignore + return array.shape # type: ignore def column_2d_filter(array: np.ndarray) -> np.ndarray: - '''Reshape a flat ndim 1 array into a 2D array with one columns and rows of length. This is used (a) for getting string representations and (b) for using np.concatenate and np binary operators on 1D arrays. - ''' + """Reshape a flat ndim 1 array into a 2D array with one columns and rows of length. This is used (a) for getting string representations and (b) for using np.concatenate and np binary operators on 1D arrays.""" # it is not clear when reshape is a copy or a view if array.ndim == 1: return np.reshape(array, (array.shape[0], 1)) @@ -76,9 +76,9 @@ def column_2d_filter(array: np.ndarray) -> np.ndarray: def column_1d_filter(array: np.ndarray) -> np.ndarray: - ''' + """ Ensure that a column that might be 2D or 1D is returned as a 1D array. - ''' + """ if array.ndim == 2: # could assert that array.shape[1] == 1, but this will raise if does not fit return np.reshape(array, array.shape[0]) @@ -86,21 +86,22 @@ def column_1d_filter(array: np.ndarray) -> np.ndarray: def row_1d_filter(array: np.ndarray) -> np.ndarray: - ''' + """ Ensure that a row that might be 2D or 1D is returned as a 1D array. - ''' + """ if array.ndim == 2: # could assert that array.shape[0] == 1, but this will raise if does not fit return np.reshape(array, array.shape[1]) return array -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- + def resolve_dtype(dt1: np.dtype, dt2: np.dtype) -> np.dtype: - ''' + """ Given two dtypes, return a compatible dtype that can hold both contents without truncation. - ''' + """ # NOTE: this is not taking into account endianness; it is not clear if this is important # NOTE: np.dtype(object) == np.object_, so we can return np.object_ @@ -138,11 +139,16 @@ def resolve_dtype(dt1: np.dtype, dt2: np.dtype) -> np.dtype: dt2_is_bool = dt2.type is np.bool_ # if any one is a string or a bool, we have to go to object; we handle both cases being the same above; result_type gives a string in mixed cases - if (dt1_is_str or dt2_is_str - or dt1_is_bool or dt2_is_bool - or dt1_is_dt or dt2_is_dt - or dt1_is_tdelta or dt2_is_tdelta - ): + if ( + dt1_is_str + or dt2_is_str + or dt1_is_bool + or dt2_is_bool + or dt1_is_dt + or dt2_is_dt + or dt1_is_tdelta + or dt2_is_tdelta + ): return DTYPE_OBJECT # if not a string or an object, can use result type @@ -150,11 +156,11 @@ def resolve_dtype(dt1: np.dtype, dt2: np.dtype) -> np.dtype: def resolve_dtype_iter(dtypes: tp.Iterable[np.dtype]) -> np.dtype: - '''Given an iterable of one or more dtypes, do pairwise comparisons to determine compatible overall type. Once we get to object we can stop checking and return object. + """Given an iterable of one or more dtypes, do pairwise comparisons to determine compatible overall type. Once we get to object we can stop checking and return object. Args: dtypes: iterable of one or more dtypes. - ''' + """ dtypes = iter(dtypes) dt_resolve = next(dtypes) @@ -166,12 +172,12 @@ def resolve_dtype_iter(dtypes: tp.Iterable[np.dtype]) -> np.dtype: def array_deepcopy( - array: np.ndarray, - memo: tp.Optional[tp.Dict[int, tp.Any]], - ) -> np.ndarray: - ''' + array: np.ndarray, + memo: tp.Optional[tp.Dict[int, tp.Any]], +) -> np.ndarray: + """ Create a deepcopy of an array, handling memo lookup, insertion, and object arrays. - ''' + """ ident = id(array) if memo is not None and ident in memo: return memo[ident] @@ -190,22 +196,20 @@ def array_deepcopy( def isna_element(value: tp.Any) -> bool: - '''Return Boolean if value is an NA. This does not yet handle pd.NA - ''' + """Return Boolean if value is an NA. This does not yet handle pd.NA""" try: - return np.isnan(value) #type: ignore + return np.isnan(value) # type: ignore except TypeError: pass if isinstance(value, (np.datetime64, np.timedelta64)): - return np.isnat(value) #type: ignore + return np.isnat(value) # type: ignore return value is None def dtype_from_element(value: tp.Optional[tp.Hashable]) -> np.dtype: - '''Given an arbitrary hashable to be treated as an element, return the appropriate dtype. This was created to avoid using np.array(value).dtype, which for a Tuple does not return object. - ''' + """Given an arbitrary hashable to be treated as an element, return the appropriate dtype. This was created to avoid using np.array(value).dtype, which for a Tuple does not return object.""" if value is np.nan: # NOTE: this will not catch all NaN instances, but will catch any default NaNs in function signatures that reference the same NaN object found on the NP root namespace return DTYPE_FLOAT_DEFAULT @@ -214,15 +218,15 @@ def dtype_from_element(value: tp.Optional[tp.Hashable]) -> np.dtype: if isinstance(value, tuple): return DTYPE_OBJECT if hasattr(value, 'dtype'): - return value.dtype #type: ignore + return value.dtype # type: ignore # NOTE: calling array and getting dtype on np.nan is faster than combining isinstance, isnan calls return np.array(value).dtype def get_new_indexers_and_screen_ref( - indexers: np.ndarray, - positions: np.ndarray, - ) -> tp.Tuple[np.ndarray, np.ndarray]: + indexers: np.ndarray, + positions: np.ndarray, +) -> tp.Tuple[np.ndarray, np.ndarray]: positions = indexers.argsort() @@ -241,9 +245,9 @@ def get_new_indexers_and_screen_ref( def get_new_indexers_and_screen_ak( - indexers: np.ndarray, - positions: np.ndarray, - ) -> tp.Tuple[np.ndarray, np.ndarray]: + indexers: np.ndarray, + positions: np.ndarray, +) -> tp.Tuple[np.ndarray, np.ndarray]: from arraykit import get_new_indexers_and_screen as ak_routine if len(positions) > len(indexers): @@ -256,6 +260,7 @@ def split_after_count(string: str, delimiter: str, count: int): *left, right = string.split(delimiter, maxsplit=count) return ','.join(left), right + def count_iteration(iterable: tp.Iterable): count = 0 for i in iterable: @@ -263,16 +268,13 @@ def count_iteration(iterable: tp.Iterable): return count -def slice_to_ascending_slice( - key: slice, - size: int - ) -> slice: - ''' +def slice_to_ascending_slice(key: slice, size: int) -> slice: + """ Given a slice, return a slice that, with ascending integers, covers the same values. Args: size: the length of the container on this axis - ''' + """ key_step = key.step key_start = key.start key_stop = key.stop @@ -285,7 +287,7 @@ def slice_to_ascending_slice( # everything else should be descending, but we might have non-descending start, stop if key_start is not None and key_stop is not None: - if norm_key_start <= norm_key_stop: # an ascending range + if norm_key_start <= norm_key_stop: # an ascending range return EMPTY_SLICE norm_range = range(norm_key_start, norm_key_stop, norm_key_step) @@ -301,6 +303,3 @@ def slice_to_ascending_slice( return slice(None if key_stop is None else norm_range[-1], stop, 1) return slice(norm_range[-1], stop, key_step * -1) - - - diff --git a/pyproject.toml b/pyproject.toml index 21222e84..ffca200f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ authors = [ { name = "Brandt Bucher" }, { name = "Charles Burkland" }, ] -license = "MIT" +license = { text = "MIT" } keywords = ["numpy", "array"] dependencies = [ "numpy>=1.24.3", @@ -54,3 +54,25 @@ arraykit = ["__init__.pyi", "py.typed"] [tool.setuptools.dynamic] version = { file = "VERSION" } +[tool.ruff] +exclude = [ + ".git", + ".mypy_cache", + ".pytest_cache", + ".ruff_cache", + "build", + "dist", +] + +line-length = 90 +indent-width = 4 + +[tool.ruff.format] +quote-style = "single" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" +docstring-code-format = true +docstring-code-line-length = "dynamic" + + diff --git a/setup.py b/setup.py index d4cb9b53..46e91660 100644 --- a/setup.py +++ b/setup.py @@ -4,36 +4,43 @@ import site, os from pathlib import Path -AK_VERSION = Path("VERSION").read_text(encoding="utf-8").strip() +AK_VERSION = Path('VERSION').read_text(encoding='utf-8').strip() + def get_ext_dir(*components: tp.Iterable[str]) -> tp.Sequence[str]: dirs = [] + # Check user site-packages + user_site = site.getusersitepackages() + if user_site: + fp = os.path.join(user_site, *components) + if os.path.exists(fp): + dirs.append(fp) + # Check system site-packages for sp in site.getsitepackages(): fp = os.path.join(sp, *components) if os.path.exists(fp): dirs.append(fp) return dirs + ext_modules = [ Extension( - name="arraykit._arraykit", + name='arraykit._arraykit', sources=[ - "src/_arraykit.c", - "src/array_go.c", - "src/array_to_tuple.c", - "src/block_index.c", - "src/delimited_to_arrays.c", - "src/methods.c", - "src/tri_map.c", - "src/auto_map.c", + 'src/_arraykit.c', + 'src/array_go.c', + 'src/array_to_tuple.c', + 'src/block_index.c', + 'src/delimited_to_arrays.c', + 'src/methods.c', + 'src/tri_map.c', + 'src/auto_map.c', ], - include_dirs=get_ext_dir('numpy', '_core', 'include') + ['src'], library_dirs=get_ext_dir('numpy', '_core', 'lib'), - define_macros=[("AK_VERSION", AK_VERSION)], - libraries=["npymath"], + define_macros=[('AK_VERSION', AK_VERSION)], + libraries=['npymath'], ) ] setup(ext_modules=ext_modules) - diff --git a/src/__init__.py b/src/__init__.py index 1a0648c2..8ae2491a 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -24,6 +24,7 @@ from ._arraykit import iterable_str_to_array_1d as iterable_str_to_array_1d from ._arraykit import split_after_count as split_after_count from ._arraykit import get_new_indexers_and_screen as get_new_indexers_and_screen +from ._arraykit import write_array_to_file as write_array_to_file from ._arraykit import count_iteration as count_iteration from ._arraykit import first_true_1d as first_true_1d from ._arraykit import first_true_2d as first_true_2d diff --git a/src/__init__.pyi b/src/__init__.pyi index 1484f9c0..ed2241a2 100644 --- a/src/__init__.pyi +++ b/src/__init__.pyi @@ -8,22 +8,22 @@ _T = tp.TypeVar('_T') __version__: str _TLabel = tp.Union[ - tp.Hashable, - int, - bool, - np.bool_, - np.integer, - float, - complex, - np.inexact, - str, - bytes, - None, - np.datetime64, - np.timedelta64, - datetime.date, - datetime.datetime, - tp.Tuple['_TLabel', ...], + tp.Hashable, + int, + bool, + np.bool_, + np.integer, + float, + complex, + np.inexact, + str, + bytes, + None, + np.datetime64, + np.timedelta64, + datetime.date, + datetime.datetime, + tp.Tuple['_TLabel', ...], ] class ErrorInitTypeBlocks(RuntimeError): @@ -31,7 +31,6 @@ class ErrorInitTypeBlocks(RuntimeError): def with_traceback(self, tb: Exception) -> Exception: ... def __setstate__(self) -> None: ... - class NonUniqueError(RuntimeError): def __init__(self, *args: tp.Any, **kwargs: tp.Any) -> None: ... def with_traceback(self, tb: Exception) -> Exception: ... @@ -62,20 +61,18 @@ class TriMap: def dst_no_fill(self) -> bool: ... def map_src_no_fill(self, /, array_from: np.ndarray) -> np.ndarray: ... def map_dst_no_fill(self, /, array_from: np.ndarray) -> np.ndarray: ... - def map_src_fill(self, /, - array_from: np.ndarray, - fill_value: tp.Any, - fill_value_dtype: np.dtype - ) -> np.ndarray: ... - def map_dst_fill(self, /, - array_from: np.ndarray, - fill_value: tp.Any, - fill_value_dtype: np.dtype - ) -> np.ndarray: ... - def map_merge(self, /, - array_from_src: np.ndarray, - array_from_dst: np.ndarray, - ) -> np.ndarray: ... + def map_src_fill( + self, /, array_from: np.ndarray, fill_value: tp.Any, fill_value_dtype: np.dtype + ) -> np.ndarray: ... + def map_dst_fill( + self, /, array_from: np.ndarray, fill_value: tp.Any, fill_value_dtype: np.dtype + ) -> np.ndarray: ... + def map_merge( + self, + /, + array_from_src: np.ndarray, + array_from_dst: np.ndarray, + ) -> np.ndarray: ... class BlockIndex: shape: tp.Tuple[int, int] @@ -84,46 +81,68 @@ class BlockIndex: columns: int def __init__( - block_count: int = 0, - row_count: int = -1, - bir_count: int = 0, - bir_capacity: int = 8, - bir_bytes: bytes = b'', - dtype: np.dtype = None, - ) -> None: ... + block_count: int = 0, + row_count: int = -1, + bir_count: int = 0, + bir_capacity: int = 8, + bir_bytes: bytes = b'', + dtype: np.dtype = None, + ) -> None: ... def register(self, __value: np.ndarray) -> bool: ... - def to_list(self,) -> tp.List[tp.Tuple[int, int]]: ... - def to_bytes(self,) -> bytes: ... - def copy(self,) -> 'BlockIndex': ... - def __len__(self,) -> int: ... - def __iter__(self,) -> tp.Iterator[tp.Tuple[int, int]]: ... - def __reversed__(self,) -> tp.Iterator[tp.Tuple[int, int]]: ... + def to_list( + self, + ) -> tp.List[tp.Tuple[int, int]]: ... + def to_bytes( + self, + ) -> bytes: ... + def copy( + self, + ) -> 'BlockIndex': ... + def __len__( + self, + ) -> int: ... + def __iter__( + self, + ) -> tp.Iterator[tp.Tuple[int, int]]: ... + def __reversed__( + self, + ) -> tp.Iterator[tp.Tuple[int, int]]: ... def __getitem__(self, __key: int) -> tp.Tuple[int, int]: ... - def __getstate__(self,) -> tp.Tuple[int, int, int, int, bytes]: ... + def __getstate__( + self, + ) -> tp.Tuple[int, int, int, int, bytes]: ... def __setstate__(self, state: tp.Tuple[int, int, int, int, bytes]) -> None: ... def get_block(self, __key: int) -> int: ... def get_column(self, __key: int) -> int: ... - def iter_select(self, - __key: tp.Union[slice, np.ndarray, tp.List[int]], - ) -> tp.Iterator[tp.Tuple[int, int]]: ... - def iter_contiguous(self, - __key: tp.Union[slice, np.ndarray, tp.List[int]], - *, - ascending: bool = False, - reduce: bool = False, - ) -> tp.Iterator[tp.Tuple[int, tp.Union[slice, int]]]: ... + def iter_select( + self, + __key: tp.Union[slice, np.ndarray, tp.List[int]], + ) -> tp.Iterator[tp.Tuple[int, int]]: ... + def iter_contiguous( + self, + __key: tp.Union[slice, np.ndarray, tp.List[int]], + *, + ascending: bool = False, + reduce: bool = False, + ) -> tp.Iterator[tp.Tuple[int, tp.Union[slice, int]]]: ... def iter_block(self) -> tp.Iterator[tp.Tuple[int, slice]]: ... - class FrozenAutoMap: - def __init__(self, labels: tp.Iterable[_TLabel] | np.ndarray = (), /,) -> None: ... - def get(self, __key: _TLabel, /,) -> int: ... + def __init__( + self, + labels: tp.Iterable[_TLabel] | np.ndarray = (), + /, + ) -> None: ... + def get( + self, + __key: _TLabel, + /, + ) -> int: ... def keys(self) -> tp.Iterator[_TLabel]: ... def items(self) -> tp.Iterator[tuple[_TLabel, int]]: ... def values(self) -> tp.Iterator[int]: ... def get_all(self, __key: list[_TLabel] | np.ndarray) -> np.ndarray: ... def get_any(self, __key: list[_TLabel] | np.ndarray) -> list[int]: ... - def __iter__(self) -> tp.Iterator[_TLabel]: ... def __getitem__(self, __key: tp.Any) -> int: ... def __contains__(self, __key: tp.Any) -> bool: ... @@ -133,59 +152,54 @@ class FrozenAutoMap: def __getstate__(self) -> tp.Any: ... def __setstate__(self, __state: tp.Any) -> None: ... def __len__(self) -> int: ... - def __or__(self) -> tp.Any: ... def __ror__(self) -> tp.Any: ... - class AutoMap(FrozenAutoMap): - def __init__(self, labels: tp.Iterable[_TLabel] | np.ndarray = (), /,) -> None: ... + def __init__( + self, + labels: tp.Iterable[_TLabel] | np.ndarray = (), + /, + ) -> None: ... def __ior__(self) -> tp.Any: ... def add(self, __key: _TLabel) -> None: ... def update(self, __keys: tp.Iterable[_TLabel] | np.ndarray) -> None: ... - - - def iterable_str_to_array_1d( - iterable: tp.Iterable[str], - *, - dtype: tp.Optional[tp.Any] = None, - thousandschar: str = ',', - decimalchar: str = '.', - ) -> np.ndarray: ... - + iterable: tp.Iterable[str], + *, + dtype: tp.Optional[tp.Any] = None, + thousandschar: str = ',', + decimalchar: str = '.', +) -> np.ndarray: ... def delimited_to_arrays( - file_like: tp.Iterable[str], - *, - axis: int = 0, - dtypes: tp.Optional[tp.Callable[[int], tp.Any]] = None, - line_select: tp.Optional[tp.Callable[[int], bool]] = None, - delimiter: str = ',', - doublequote: bool = True, - escapechar: tp.Optional[str] = '', - quotechar: tp.Optional[str] = '"', - quoting: int = 0, - skipinitialspace: bool = False, - strict: bool = False, - thousandschar: str = ',', - decimalchar: str = '.', - ) -> tp.List[np.array]: ... - + file_like: tp.Iterable[str], + *, + axis: int = 0, + dtypes: tp.Optional[tp.Callable[[int], tp.Any]] = None, + line_select: tp.Optional[tp.Callable[[int], bool]] = None, + delimiter: str = ',', + doublequote: bool = True, + escapechar: tp.Optional[str] = '', + quotechar: tp.Optional[str] = '"', + quoting: int = 0, + skipinitialspace: bool = False, + strict: bool = False, + thousandschar: str = ',', + decimalchar: str = '.', +) -> tp.List[np.array]: ... def split_after_count( - string: str, - *, - delimiter: str = ',', - count: int = 0, - doublequote: bool = True, - escapechar: tp.Optional[str] = '', - quotechar: tp.Optional[str] = '"', - quoting: int = 0, - strict: bool = False, - ) -> tp.Tuple[str, str]: ... - + string: str, + *, + delimiter: str = ',', + count: int = 0, + doublequote: bool = True, + escapechar: tp.Optional[str] = '', + quotechar: tp.Optional[str] = '"', + quoting: int = 0, + strict: bool = False, +) -> tp.Tuple[str, str]: ... def count_iteration(__iterable: tp.Iterable) -> int: ... - def immutable_filter(__array: np.ndarray) -> np.ndarray: ... def mloc(__array: np.ndarray) -> int: ... def name_filter(__name: _TLabel) -> _TLabel: ... @@ -193,13 +207,23 @@ def shape_filter(__array: np.ndarray) -> np.ndarray: ... def column_2d_filter(__array: np.ndarray) -> np.ndarray: ... def column_1d_filter(__array: np.ndarray) -> np.ndarray: ... def row_1d_filter(__array: np.ndarray) -> np.ndarray: ... -def array_deepcopy(__array: np.ndarray, memo: tp.Optional[tp.Dict[int, tp.Any]]) -> np.ndarray: ... +def array_deepcopy( + __array: np.ndarray, memo: tp.Optional[tp.Dict[int, tp.Any]] +) -> np.ndarray: ... def resolve_dtype(__d1: np.dtype, __d2: np.dtype) -> np.dtype: ... def resolve_dtype_iter(__dtypes: tp.Iterable[np.dtype]) -> np.dtype: ... def isna_element(__value: tp.Any, include_none: bool = True) -> bool: ... def dtype_from_element(__value: tp.Optional[_TLabel]) -> np.dtype: ... -def get_new_indexers_and_screen(indexers: np.ndarray, positions: np.ndarray) -> tp.Tuple[np.ndarray, np.ndarray]: ... - +def get_new_indexers_and_screen( + indexers: np.ndarray, positions: np.ndarray +) -> tp.Tuple[np.ndarray, np.ndarray]: ... +def write_array_to_file( + __array: np.ndarray, + __file: tp.IO[bytes], + *, + fortran_order: bool = False, + buffersize: int = 8192, +) -> None: ... def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ... def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ... def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ... @@ -209,4 +233,4 @@ def astype_array(__array: np.ndarray, __dtype: np.dtype | None, /) -> np.ndarray def slice_to_ascending_slice(__slice: slice, __size: int) -> slice: ... def slice_to_unit(__slice: slice, /) -> int: ... def array_to_tuple_array(__array: np.ndarray) -> np.ndarray: ... -def array_to_tuple_iter(__array: np.ndarray) -> tp.Iterator[tp.Tuple[tp.Any, ...]]: ... \ No newline at end of file +def array_to_tuple_iter(__array: np.ndarray) -> tp.Iterator[tp.Tuple[tp.Any, ...]]: ... diff --git a/src/_arraykit.c b/src/_arraykit.c index 7ce6eb20..db364883 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -65,6 +65,10 @@ static PyMethodDef arraykit_methods[] = { (PyCFunction)get_new_indexers_and_screen, METH_VARARGS | METH_KEYWORDS, NULL}, + {"write_array_to_file", + (PyCFunction)write_array_to_file, + METH_VARARGS | METH_KEYWORDS, + NULL}, {NULL}, }; diff --git a/src/methods.c b/src/methods.c index 79bcff3c..c9d41a72 100644 --- a/src/methods.c +++ b/src/methods.c @@ -7,6 +7,13 @@ # include "numpy/arrayobject.h" # include "numpy/arrayscalars.h" # include "numpy/halffloat.h" +# include + +# ifdef _WIN32 +# include +# else +# include +# endif # include "methods.h" # include "utilities.h" @@ -1101,6 +1108,231 @@ static char *array_deepcopy_kwarg_names[] = { NULL }; +static char *write_array_to_file_kwarg_names[] = { + "array", + "file", + "fortran_order", + "buffersize", + NULL +}; + +/* Helper function to write data directly to a file descriptor. + * Returns -1 on error (with Python exception set), or 0 on success. + */ +static int +write_to_fd(int fd, const char *data, Py_ssize_t size) +{ + Py_ssize_t total_written = 0; + while (total_written < size) { + Py_ssize_t to_write = size - total_written; +#ifdef _WIN32 + /* On Windows, _write has a max count of INT_MAX */ + if (to_write > INT_MAX) { + to_write = INT_MAX; + } + int written = _write(fd, data + total_written, (unsigned int)to_write); +#else + /* On POSIX, write may have platform-specific limits */ + ssize_t written = write(fd, data + total_written, to_write); +#endif + if (written < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return -1; + } + if (written == 0) { + /* No progress - treat as error */ + PyErr_SetString(PyExc_OSError, "write() returned 0"); + return -1; + } + total_written += written; + } + return 0; +} + +PyObject * +write_array_to_file(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) +{ + PyObject *a; + PyObject *file; + int fortran_order = 0; + npy_intp buffersize = 8192; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "OO|pn:write_array_to_file", write_array_to_file_kwarg_names, + &a, + &file, + &fortran_order, + &buffersize)) { + return NULL; + } + AK_CHECK_NUMPY_ARRAY(a); + if (buffersize < 1) { + PyErr_SetString(PyExc_ValueError, "buffersize must be at least 1"); + return NULL; + } + + PyArrayObject *array = (PyArrayObject*)a; + npy_uint32 flags = NPY_ITER_EXTERNAL_LOOP | NPY_ITER_BUFFERED | NPY_ITER_ZEROSIZE_OK; + NPY_ORDER order = (fortran_order && !PyArray_IS_C_CONTIGUOUS(array)) + ? NPY_FORTRANORDER + : NPY_CORDER; + PyArrayObject *operands[] = {array}; + npy_uint32 op_flags[] = {NPY_ITER_READONLY | NPY_ITER_ALIGNED}; + NpyIter *iter = NpyIter_AdvancedNew( + 1, + operands, + flags, + order, + NPY_NO_CASTING, + op_flags, + NULL, + -1, + NULL, + NULL, + buffersize + ); + if (iter == NULL) { + return NULL; + } + + NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL); + if (iternext == NULL) { + NpyIter_Deallocate(iter); + return NULL; + } + + char **dataptr = NpyIter_GetDataPtrArray(iter); + npy_intp *strideptr = NpyIter_GetInnerStrideArray(iter); + npy_intp *innersizeptr = NpyIter_GetInnerLoopSizePtr(iter); + Py_ssize_t itemsize = PyArray_ITEMSIZE(array); + if (itemsize == 0) { + NpyIter_Deallocate(iter); + Py_RETURN_NONE; + } + + // Try to get file descriptor for direct writes. fall back to calling Python's write() method. + int fd = PyObject_AsFileDescriptor(file); + int use_fd = (fd >= 0); + + /* If we are going to write directly to the file descriptor, we must first + * flush any data buffered at the Python level (e.g. an io.BufferedWriter + * from open(path, 'wb')). Writing to the raw fd bypasses that user-space + * buffer, so without a flush our bytes would land at the kernel offset, + * ahead of any still-buffered data, corrupting the file. Raw integer fds + * (e.g. from os.open) have no flush() method; that is not an error. */ + if (use_fd) { + PyObject *flush_result = PyObject_CallMethod(file, "flush", NULL); + if (flush_result == NULL) { + PyErr_Clear(); + } + else { + Py_DECREF(flush_result); + } + } + + // For non-fd path, we need the write method name + PyObject *write_name = NULL; + if (!use_fd) { + PyErr_Clear(); /* Clear the error from PyObject_AsFileDescriptor */ + write_name = PyUnicode_InternFromString("write"); + if (write_name == NULL) { + NpyIter_Deallocate(iter); + return NULL; + } + } + + // Allocate a buffer for packing non-contiguous data + char *pack_buffer = NULL; + Py_ssize_t pack_buffer_size = 0; + + do { + npy_intp inner_size = *innersizeptr; + if (inner_size == 0) { + continue; + } + if (inner_size > PY_SSIZE_T_MAX / itemsize) { + PyErr_SetString(PyExc_OverflowError, "array chunk too large"); + goto fail; + } + Py_ssize_t chunk_size = (Py_ssize_t)inner_size * itemsize; + + const char *data_to_write = *dataptr; + + // If stride is not contiguous, pack into buffer + if (*strideptr != itemsize) { + if (pack_buffer_size < chunk_size) { + char *new_buffer = (char *)PyMem_Realloc(pack_buffer, chunk_size); + if (new_buffer == NULL) { + PyErr_NoMemory(); + goto fail; + } + pack_buffer = new_buffer; + pack_buffer_size = chunk_size; + } + char *src = *dataptr; + npy_intp stride = *strideptr; + for (npy_intp i = 0; i < inner_size; ++i) { + memcpy(pack_buffer + (i * itemsize), src + (i * stride), itemsize); + } + data_to_write = pack_buffer; + } + + // Write the data + if (use_fd) { + if (write_to_fd(fd, data_to_write, chunk_size) < 0) { + goto fail; + } + } + else { + /* Note: PyMemoryView_FromMemory requires non-const char*, but we pass + * PyBUF_READ flag which makes the view read-only, so the cast is safe. */ + PyObject *buffer = PyMemoryView_FromMemory((char *)data_to_write, chunk_size, PyBUF_READ); + if (buffer == NULL) { + goto fail; + } + + PyObject *write_result = PyObject_CallMethodObjArgs(file, write_name, buffer, NULL); + Py_DECREF(buffer); + if (write_result == NULL) { + goto fail; + } + + /* Check for partial writes */ + if (PyLong_Check(write_result)) { + Py_ssize_t bytes_written = PyLong_AsSsize_t(write_result); + if (bytes_written < 0 && PyErr_Occurred()) { + Py_DECREF(write_result); + goto fail; + } + if (bytes_written != chunk_size) { + Py_DECREF(write_result); + PyErr_Format(PyExc_OSError, + "write() returned partial write (%zd of %zd bytes)", + bytes_written, chunk_size); + goto fail; + } + } + Py_DECREF(write_result); + } + } while(iternext(iter)); + + /* NpyIter can return 0 for TWO reasons: end of iteration OR error. + * We MUST check PyErr_Occurred() to distinguish between the two. */ + if (PyErr_Occurred()) { + goto fail; + } + + PyMem_Free(pack_buffer); + NpyIter_Deallocate(iter); + Py_XDECREF(write_name); + Py_RETURN_NONE; + + fail: + PyMem_Free(pack_buffer); + NpyIter_Deallocate(iter); + Py_XDECREF(write_name); + return NULL; +} + PyObject * array_deepcopy(PyObject *m, PyObject *args, PyObject *kwargs) { diff --git a/src/methods.h b/src/methods.h index 5d2a80c4..d81040e2 100644 --- a/src/methods.h +++ b/src/methods.h @@ -75,6 +75,9 @@ isna_element(PyObject *m, PyObject *args, PyObject *kwargs); PyObject * get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs); +PyObject * +write_array_to_file(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs); + // Specialized array deepcopy that stores immutable arrays in an optional memo dict that can be provided with kwargs. PyObject * array_deepcopy(PyObject *m, PyObject *args, PyObject *kwargs); diff --git a/test/test_array_go.py b/test/test_array_go.py index 6741e4d3..35fae2b3 100644 --- a/test/test_array_go.py +++ b/test/test_array_go.py @@ -1,4 +1,3 @@ - import unittest import copy import pickle @@ -8,9 +7,8 @@ from arraykit import ArrayGO from arraykit import mloc -class TestUnit(unittest.TestCase): - +class TestUnit(unittest.TestCase): def test_array_init_a(self) -> None: with self.assertRaises(NotImplementedError): _ = ArrayGO(np.array((3, 4, 5))) @@ -19,43 +17,31 @@ def test_array_append_a(self) -> None: ag1 = ArrayGO(('a', 'b', 'c', 'd')) - self.assertEqual([x for x in ag1], - ['a', 'b', 'c', 'd']) - - self.assertEqual(ag1.values.tolist(), - ['a', 'b', 'c', 'd']) + self.assertEqual([x for x in ag1], ['a', 'b', 'c', 'd']) + self.assertEqual(ag1.values.tolist(), ['a', 'b', 'c', 'd']) ag1.append('e') ag1.extend(('f', 'g')) - self.assertEqual(ag1.values.tolist(), - ['a', 'b', 'c', 'd', 'e', 'f', 'g']) - - self.assertEqual([x for x in ag1], - ['a', 'b', 'c', 'd', 'e', 'f', 'g']) + self.assertEqual(ag1.values.tolist(), ['a', 'b', 'c', 'd', 'e', 'f', 'g']) + self.assertEqual([x for x in ag1], ['a', 'b', 'c', 'd', 'e', 'f', 'g']) def test_array_append_b(self) -> None: ag1 = ArrayGO(np.array(('a', 'b', 'c', 'd'), object)) - self.assertEqual([x for x in ag1], - ['a', 'b', 'c', 'd']) - - self.assertEqual(ag1.values.tolist(), - ['a', 'b', 'c', 'd']) + self.assertEqual([x for x in ag1], ['a', 'b', 'c', 'd']) + self.assertEqual(ag1.values.tolist(), ['a', 'b', 'c', 'd']) ag1.append('e') ag1.extend(('f', 'g')) - self.assertEqual(ag1.values.tolist(), - ['a', 'b', 'c', 'd', 'e', 'f', 'g']) - - self.assertEqual([x for x in ag1], - ['a', 'b', 'c', 'd', 'e', 'f', 'g']) + self.assertEqual(ag1.values.tolist(), ['a', 'b', 'c', 'd', 'e', 'f', 'g']) + self.assertEqual([x for x in ag1], ['a', 'b', 'c', 'd', 'e', 'f', 'g']) def test_array_getitem_a(self) -> None: @@ -71,8 +57,7 @@ def test_array_getitem_a(self) -> None: post = ag1[ag1.values == 'b'] self.assertEqual(post.tolist(), ['b', 'b']) - self.assertEqual(ag1[[2,1,1,1]].tolist(), - ['c', 'b', 'b', 'b']) + self.assertEqual(ag1[[2, 1, 1, 1]].tolist(), ['c', 'b', 'b', 'b']) def test_array_copy_a(self) -> None: @@ -82,18 +67,16 @@ def test_array_copy_a(self) -> None: ag2 = ag1.copy() ag1.extend(('f', 'g')) - self.assertEqual(ag1.values.tolist(), - ['a', 'b', 'c', 'd', 'e', 'f', 'g']) + self.assertEqual(ag1.values.tolist(), ['a', 'b', 'c', 'd', 'e', 'f', 'g']) - self.assertEqual(ag2.values.tolist(), - ['a', 'b', 'c', 'd', 'e']) + self.assertEqual(ag2.values.tolist(), ['a', 'b', 'c', 'd', 'e']) def test_array_deepcopy_a(self) -> None: ag1 = ArrayGO(np.array(('a', 'b', 'c', 'd'), dtype=object)) ag1.append('e') ag1.extend(('f', 'g')) ag2 = copy.deepcopy(ag1) - self.assertEqual(ag1.values.tolist(), ag2.values.tolist()) #type: ignore + self.assertEqual(ag1.values.tolist(), ag2.values.tolist()) # type: ignore def test_array_len_a(self) -> None: @@ -105,9 +88,9 @@ def test_array_len_a(self) -> None: def test_array_getnewargs_a(self) -> None: ag1 = ArrayGO(np.array(('a', 'b', 'c', 'd'), object)) self.assertEqual( - ag1.__getnewargs__()[0].tolist(), - ag1.values.tolist(), - ) + ag1.__getnewargs__()[0].tolist(), + ag1.values.tolist(), + ) def test_array_pickle_a(self) -> None: ag1 = ArrayGO(np.array(('a', 'b', 'c', 'd'), object)) @@ -116,10 +99,5 @@ def test_array_pickle_a(self) -> None: self.assertEqual(ag1.values.tolist(), ag2.values.tolist()) - if __name__ == '__main__': unittest.main() - - - - diff --git a/test/test_astype_array.py b/test/test_astype_array.py index acfd32fb..b462b345 100644 --- a/test/test_astype_array.py +++ b/test/test_astype_array.py @@ -4,8 +4,8 @@ from arraykit import astype_array -class TestUnit(unittest.TestCase): +class TestUnit(unittest.TestCase): def test_astype_array_a1(self) -> None: a1 = np.array([10, 20, 30], dtype=np.int64) a1.flags.writeable = False @@ -13,7 +13,6 @@ def test_astype_array_a1(self) -> None: a2 = astype_array(a1, np.int64) self.assertEqual(id(a1), id(a2)) - def test_astype_array_a2(self) -> None: a1 = np.array([10, 20, 30], dtype=np.int64) a1.flags.writeable = False @@ -22,7 +21,6 @@ def test_astype_array_a2(self) -> None: self.assertNotEqual(id(a1), id(a2)) self.assertEqual(a2.dtype, np.dtype(np.float64)) - def test_astype_array_a3(self) -> None: a1 = np.array([False, True, False]) @@ -38,7 +36,6 @@ def test_astype_array_b1(self) -> None: self.assertTrue(a2.flags.writeable) self.assertEqual(list(a2), [np.datetime64('2021'), np.datetime64('2024')]) - def test_astype_array_b2(self) -> None: a1 = np.array(['2021', '1642'], dtype=np.datetime64) @@ -47,34 +44,43 @@ def test_astype_array_b2(self) -> None: self.assertTrue(a2.flags.writeable) self.assertEqual(list(a2), [np.datetime64('2021'), np.datetime64('1642')]) - def test_astype_array_b3(self) -> None: - a1 = np.array(['2021', '2024', '1984', '1642'], dtype=np.datetime64).reshape((2, 2)) + a1 = np.array(['2021', '2024', '1984', '1642'], dtype=np.datetime64).reshape( + (2, 2) + ) a2 = astype_array(a1, np.object_) self.assertEqual(a2.dtype, np.dtype(np.object_)) self.assertTrue(a2.flags.writeable) self.assertEqual( - list(list(a) for a in a2), - [[np.datetime64('2021'), np.datetime64('2024')], [np.datetime64('1984'), np.datetime64('1642')]]) + list(list(a) for a in a2), + [ + [np.datetime64('2021'), np.datetime64('2024')], + [np.datetime64('1984'), np.datetime64('1642')], + ], + ) def test_astype_array_b4(self) -> None: - a1 = np.array(['2021', '2024', '1532', '1984', '1642', '899'], dtype=np.datetime64).reshape((2, 3)) + a1 = np.array( + ['2021', '2024', '1532', '1984', '1642', '899'], dtype=np.datetime64 + ).reshape((2, 3)) a2 = astype_array(a1, np.object_) self.assertEqual(a2.dtype, np.dtype(np.object_)) self.assertEqual(a2.shape, (2, 3)) self.assertTrue(a2.flags.writeable) self.assertEqual( - list(list(a) for a in a2), - [[np.datetime64('2021'), np.datetime64('2024'), np.datetime64('1532')], - [np.datetime64('1984'), np.datetime64('1642'), np.datetime64('899')]]) + list(list(a) for a in a2), + [ + [np.datetime64('2021'), np.datetime64('2024'), np.datetime64('1532')], + [np.datetime64('1984'), np.datetime64('1642'), np.datetime64('899')], + ], + ) def test_astype_array_c(self) -> None: with self.assertRaises(TypeError): _ = astype_array([3, 4, 5], np.int64) - def test_astype_array_d1(self) -> None: a1 = np.array([10, 20, 30], dtype=np.int64) a2 = astype_array(a1) @@ -83,7 +89,6 @@ def test_astype_array_d1(self) -> None: self.assertEqual(a2.shape, (3,)) self.assertTrue(a2.flags.writeable) - def test_astype_array_d2(self) -> None: a1 = np.array([10, 20, 30], dtype=np.int64) a2 = astype_array(a1, None) @@ -92,8 +97,6 @@ def test_astype_array_d2(self) -> None: self.assertEqual(a2.shape, (3,)) self.assertTrue(a2.flags.writeable) - - def test_astype_array_d3(self) -> None: a1 = np.array([10, 20, 30], dtype=np.int64) a2 = astype_array(a1, np.int64) @@ -105,19 +108,26 @@ def test_astype_array_d3(self) -> None: self.assertNotEqual(id(a1), id(a2)) def test_astype_array_e(self) -> None: - a1 = np.array(['2021', '2024', '1997', '1984', '2000', '1999'], dtype='datetime64[ns]').reshape((2, 3)) + a1 = np.array( + ['2021', '2024', '1997', '1984', '2000', '1999'], dtype='datetime64[ns]' + ).reshape((2, 3)) a2 = astype_array(a1, np.object_) self.assertEqual(a2.dtype, np.dtype(np.object_)) self.assertEqual(a2.shape, (2, 3)) self.assertTrue(a2.flags.writeable) self.assertEqual( - list(list(a) for a in a2), - [[np.datetime64('2021-01-01T00:00:00.000000000'), - np.datetime64('2024-01-01T00:00:00.000000000'), - np.datetime64('1997-01-01T00:00:00.000000000')], - [np.datetime64('1984-01-01T00:00:00.000000000'), - np.datetime64('2000-01-01T00:00:00.000000000'), - np.datetime64('1999-01-01T00:00:00.000000000')]] - ) - + list(list(a) for a in a2), + [ + [ + np.datetime64('2021-01-01T00:00:00.000000000'), + np.datetime64('2024-01-01T00:00:00.000000000'), + np.datetime64('1997-01-01T00:00:00.000000000'), + ], + [ + np.datetime64('1984-01-01T00:00:00.000000000'), + np.datetime64('2000-01-01T00:00:00.000000000'), + np.datetime64('1999-01-01T00:00:00.000000000'), + ], + ], + ) diff --git a/test/test_auto_map.py b/test/test_auto_map.py index 0154cece..a674bd92 100644 --- a/test/test_auto_map.py +++ b/test/test_auto_map.py @@ -11,14 +11,14 @@ def test_am_extend(): - am1 = AutoMap(("a", "b")) - am2 = am1 | AutoMap(("c", "d")) - assert list(am2.keys()) == ["a", "b", "c", "d"] + am1 = AutoMap(('a', 'b')) + am2 = am1 | AutoMap(('c', 'd')) + assert list(am2.keys()) == ['a', 'b', 'c', 'd'] def test_am_add(): a = AutoMap() - for l, key in enumerate(["a", "b", "c", "d"]): + for l, key in enumerate(['a', 'b', 'c', 'd']): assert a.add(key) is None assert len(a) == l + 1 assert a[key] == l @@ -26,7 +26,7 @@ def test_am_add(): def test_fam_contains(): x = [] - fam = FrozenAutoMap(("a", "b", "c")) + fam = FrozenAutoMap(('a', 'b', 'c')) assert (x in fam.values()) == False # NOTE: exercise x to force seg fault assert len(x) == 0 @@ -115,32 +115,32 @@ def test_fam_constructor_array_float_a(): def test_fam_constructor_array_dt64_a(): - a1 = np.array(("1970-01", "2023-05"), dtype=np.datetime64) + a1 = np.array(('1970-01', '2023-05'), dtype=np.datetime64) a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam[np.datetime64("2023-05")] == 1 - assert fam[np.datetime64("1970-01")] == 0 + assert fam[np.datetime64('2023-05')] == 1 + assert fam[np.datetime64('1970-01')] == 0 with pytest.raises(KeyError): - fam[np.datetime64("nat")] + fam[np.datetime64('nat')] with pytest.raises(KeyError): - fam[np.datetime64("1970")] + fam[np.datetime64('1970')] def test_fam_constructor_array_dt64_b(): - a1 = np.array(("1542", "nat"), dtype=np.datetime64) + a1 = np.array(('1542', 'nat'), dtype=np.datetime64) a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam[np.datetime64("nat")] == 1 - assert fam[np.datetime64("nat", "D")] == 1 - assert fam[np.datetime64("nat", "ns")] == 1 - assert fam[np.datetime64("1542")] == 0 + assert fam[np.datetime64('nat')] == 1 + assert fam[np.datetime64('nat', 'D')] == 1 + assert fam[np.datetime64('nat', 'ns')] == 1 + assert fam[np.datetime64('1542')] == 0 def test_fam_constructor_array_dt64_c(): - a1 = np.array(("nat", "nat"), dtype=np.datetime64) + a1 = np.array(('nat', 'nat'), dtype=np.datetime64) a1.flags.writeable = False fam = FrozenAutoMap(a1) # when we get "generic" dt64 units, we load scalars in a list, and can thus support multiple NaNs @@ -148,7 +148,7 @@ def test_fam_constructor_array_dt64_c(): def test_fam_constructor_array_dt64_d(): - a1 = np.array(("2023-05", "2023-05"), dtype=np.datetime64) + a1 = np.array(('2023-05', '2023-05'), dtype=np.datetime64) a1.flags.writeable = False with pytest.raises(NonUniqueError): fam = FrozenAutoMap(a1) @@ -158,14 +158,14 @@ def test_fam_constructor_array_dt64_d(): def test_fam_constructor_array_unicode_a(): - a1 = np.array(("a", "b", "a")) + a1 = np.array(('a', 'b', 'a')) a1.flags.writeable = False with pytest.raises(NonUniqueError): fam = FrozenAutoMap(a1) def test_fam_constructor_array_unicode_b(): - a1 = np.array(("a", "bb", "ccc")) + a1 = np.array(('a', 'bb', 'ccc')) a1.flags.writeable = False fam = FrozenAutoMap(a1) for k in a1: @@ -173,7 +173,7 @@ def test_fam_constructor_array_unicode_b(): def test_fam_constructor_array_unicode_c(): - a1 = np.array(("z0Ct", "z0DS", "z0E9")) + a1 = np.array(('z0Ct', 'z0DS', 'z0E9')) a1.flags.writeable = False fam = FrozenAutoMap(a1) @@ -191,82 +191,82 @@ def test_fam_constructor_array_unicode_c(): def test_fam_constructor_array_unicode_d1(): - a1 = np.array(["", "\x000"], dtype="U2") + a1 = np.array(['', '\x000'], dtype='U2') a1.flags.writeable = False fam = FrozenAutoMap(a1) assert len(fam) == 2 - assert list(fam) == ["", "\x000"] - assert "" in fam - assert "\x000" in fam + assert list(fam) == ['', '\x000'] + assert '' in fam + assert '\x000' in fam def test_fam_constructor_array_unicode_d2(): - a1 = np.array(["", "\x000\x00"], dtype="U3") + a1 = np.array(['', '\x000\x00'], dtype='U3') a1.flags.writeable = False fam = FrozenAutoMap(a1) assert len(fam) == 2 - assert list(fam) == ["", "\x000"] # we lost the last null - assert "" in fam - assert "\x000" in fam + assert list(fam) == ['', '\x000'] # we lost the last null + assert '' in fam + assert '\x000' in fam def test_fam_copy_array_unicode_a(): - a1 = np.array(("a", "ccc", "bb")) + a1 = np.array(('a', 'ccc', 'bb')) a1.flags.writeable = False fam1 = FrozenAutoMap(a1) fam2 = FrozenAutoMap(fam1) - assert fam2["a"] == 0 - assert fam2["ccc"] == 1 - assert fam2["bb"] == 2 + assert fam2['a'] == 0 + assert fam2['ccc'] == 1 + assert fam2['bb'] == 2 # ------------------------------------------------------------------------------ def test_fam_constructor_array_bytes_a(): - a1 = np.array((b"a", b"b", b"c")) + a1 = np.array((b'a', b'b', b'c')) with pytest.raises(TypeError): fam = FrozenAutoMap(a1) def test_fam_constructor_array_bytes_b(): - a1 = np.array((b"aaa", b"b", b"aaa")) + a1 = np.array((b'aaa', b'b', b'aaa')) a1.flags.writeable = False with pytest.raises(NonUniqueError): fam = FrozenAutoMap(a1) def test_fam_constructor_array_bytes_c(): - a1 = np.array((b"aaa", b"b", b"cc")) + a1 = np.array((b'aaa', b'b', b'cc')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam[b"aaa"] == 0 - assert fam[b"b"] == 1 - assert fam[b"cc"] == 2 + assert fam[b'aaa'] == 0 + assert fam[b'b'] == 1 + assert fam[b'cc'] == 2 def test_fam_copy_array_bytes_a(): - a1 = np.array((b"a", b"ccc", b"bb")) + a1 = np.array((b'a', b'ccc', b'bb')) a1.flags.writeable = False fam1 = FrozenAutoMap(a1) fam2 = FrozenAutoMap(fam1) - assert fam2[b"a"] == 0 - assert fam2[b"ccc"] == 1 - assert fam2[b"bb"] == 2 + assert fam2[b'a'] == 0 + assert fam2[b'ccc'] == 1 + assert fam2[b'bb'] == 2 # ------------------------------------------------------------------------------ def test_fam_array_bytes_get_a(): - a1 = np.array((b"", b" ", b" ", b" ")) + a1 = np.array((b'', b' ', b' ', b' ')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get(b"") == 0 - assert fam.get(b" ") == None - assert fam.get(b" ") == 2 - assert fam.get(b" ") == 3 + assert fam.get(b'') == 0 + assert fam.get(b' ') == None + assert fam.get(b' ') == 2 + assert fam.get(b' ') == 3 # ------------------------------------------------------------------------------ @@ -297,7 +297,7 @@ def test_fam_array_int_get_a(): a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("f") is None + assert fam.get('f') is None assert fam.get(1) == 0 assert fam.get(True) == 0 assert fam.get(a1[2]) == 2 @@ -309,7 +309,7 @@ def test_fam_array_int_get_b(): a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("f") is None + assert fam.get('f') is None assert fam.get(1) == 0 assert fam.get(True) == 0 assert fam.get(a1[2]) == 2 @@ -322,7 +322,7 @@ def test_fam_array_int_get_c1(): a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("f") is None + assert fam.get('f') is None assert fam.get(1) == 0 assert fam.get(True) == 0 assert fam.get(a1[2]) == 2 @@ -350,7 +350,7 @@ def test_fam_array_int_get_d(): a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("f") is None + assert fam.get('f') is None assert fam.get(1) == 0 assert fam.get(True) == 0 assert fam.get(a1[2]) == 2 @@ -363,7 +363,7 @@ def test_fam_array_int_get_e(): a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("f") is None + assert fam.get('f') is None assert fam.get(2147483648) == 0 assert fam.get(a1[0]) == 0 @@ -405,7 +405,7 @@ def test_fam_array_int_get_d(): ): a2 = a1.astype(ctype) for k in a2: - assert k in fam, f"{type(k)}" + assert k in fam, f'{type(k)}' assert 2.0 in fam assert 2.1 not in fam assert True in fam @@ -429,7 +429,7 @@ def test_fam_array_uint_get_a(): a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("f") is None + assert fam.get('f') is None assert fam.get(1) == 0 assert fam.get(True) == 0 assert fam.get(a1[2]) == 2 @@ -444,7 +444,7 @@ def test_fam_array_uint_get_b(): a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("f") is None + assert fam.get('f') is None assert fam.get(1) == 1 assert fam.get(True) == 1 assert fam.get(a1[2]) == 2 @@ -459,7 +459,7 @@ def test_fam_array_uint_get_c(): a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("f") is None + assert fam.get('f') is None assert fam.get(1) == 1 assert fam.get(True) == 1 assert fam.get(a1[2]) == 2 @@ -474,7 +474,7 @@ def test_fam_array_uint_get_d(): a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("f") is None + assert fam.get('f') is None assert fam.get(1) == 1 assert fam.get(True) == 1 assert fam.get(a1[2]) == 2 @@ -512,10 +512,10 @@ def test_fam_array_uint_get_f(): ): a2 = a1.astype(ctype) for k in a2: - assert k in fam, f"{type(k)}" + assert k in fam, f'{type(k)}' a3 = -a2 for k in a3: - assert k not in fam, f"{type(k)}" + assert k not in fam, f'{type(k)}' assert True in fam assert 4.0 in fam @@ -532,7 +532,7 @@ def test_fam_array_float_get_a(): a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("f") is None + assert fam.get('f') is None assert fam.get(1.5) == 0 assert fam.get(10.2) == 1 assert fam.get(a1[1]) == 1 @@ -544,7 +544,7 @@ def test_fam_array_float_get_b(): a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("f") is None + assert fam.get('f') is None # assert fam.get(1.5) == 0 assert fam.get(a1[0]) == 0 assert fam.get(a1[1]) == 1 @@ -555,7 +555,7 @@ def test_fam_array_float_get_c1(): a1 = np.array((1.5, 10.2, 8.8), dtype=np.float16) a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("f") is None + assert fam.get('f') is None assert fam.get(a1[0]) == 0 assert fam.get(a1[1]) == 1 assert fam.get(a1[2]) == 2 @@ -586,10 +586,10 @@ def test_fam_array_float_get_d(): ): a2 = a1.astype(ctype) for k in a2: - assert k in fam, f"{type(k)}" + assert k in fam, f'{type(k)}' a3 = -a2 for k in a3: - assert k not in fam, f"{type(k)}" + assert k not in fam, f'{type(k)}' assert True in fam assert 4.0 in fam @@ -602,28 +602,28 @@ def test_fam_array_float_get_d(): def test_fam_array_unicode_get_a(): - a1 = np.array(("bb", "a", "ccc")) + a1 = np.array(('bb', 'a', 'ccc')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("a") == 1 - assert fam.get("bb") == 0 - assert fam.get("ccc") == 2 + assert fam.get('a') == 1 + assert fam.get('bb') == 0 + assert fam.get('ccc') == 2 assert fam.get(None) is None assert fam.get(3.2) is None - assert fam.get("cc") is None - assert fam.get("cccc") is None + assert fam.get('cc') is None + assert fam.get('cccc') is None def test_fam_array_unicode_get_b(): - a1 = np.array(("", " ", " ", " ")) + a1 = np.array(('', ' ', ' ', ' ')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert fam.get("") == 0 - assert fam.get(" ") == None - assert fam.get(" ") == 2 - assert fam.get(" ") == 3 + assert fam.get('') == 0 + assert fam.get(' ') == None + assert fam.get(' ') == 2 + assert fam.get(' ') == 3 # ------------------------------------------------------------------------------ @@ -659,50 +659,50 @@ def test_fam_array_items_a(): def test_fam_array_values_b(): - a1 = np.array(("a", "b", "c", "d")) + a1 = np.array(('a', 'b', 'c', 'd')) a1.flags.writeable = False fam = FrozenAutoMap(a1) assert list(fam.values()) == [0, 1, 2, 3] def test_fam_array_keys_b(): - a1 = np.array(("a", "b", "c", "d")) + a1 = np.array(('a', 'b', 'c', 'd')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert list(fam.keys()) == ["a", "b", "c", "d"] + assert list(fam.keys()) == ['a', 'b', 'c', 'd'] def test_fam_array_items_b(): - a1 = np.array(("a", "b", "c", "d")) + a1 = np.array(('a', 'b', 'c', 'd')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - assert list(fam.items()) == [("a", 0), ("b", 1), ("c", 2), ("d", 3)] + assert list(fam.items()) == [('a', 0), ('b', 1), ('c', 2), ('d', 3)] def test_fam_array_items_c(): - a1 = np.array(("a", "b", "c")) + a1 = np.array(('a', 'b', 'c')) a1.flags.writeable = False fam1 = FrozenAutoMap(a1) fam2 = FrozenAutoMap(fam1) - assert list(fam2.items()) == [("a", 0), ("b", 1), ("c", 2)] - assert list(fam1.items()) == [("a", 0), ("b", 1), ("c", 2)] + assert list(fam2.items()) == [('a', 0), ('b', 1), ('c', 2)] + assert list(fam1.items()) == [('a', 0), ('b', 1), ('c', 2)] # ------------------------------------------------------------------------------ def test_am_array_constructor_a(): - a1 = np.array(("a", "b", "c")) + a1 = np.array(('a', 'b', 'c')) a1.flags.writeable = False am1 = AutoMap(a1) def test_am_array_constructor_b(): - a1 = np.array(("2022-01", "2023-05"), dtype=np.datetime64) + a1 = np.array(('2022-01', '2023-05'), dtype=np.datetime64) a1.flags.writeable = False am1 = AutoMap(a1) - assert am1[np.datetime64("2023-05")] == 1 + assert am1[np.datetime64('2023-05')] == 1 def test_am_array_constructor_c(): @@ -718,7 +718,7 @@ def test_am_array_constructor_c(): def test_fam_array_pickle_a(): - a1 = np.array(("a", "b", "c", "d")) + a1 = np.array(('a', 'b', 'c', 'd')) a1.flags.writeable = False fam1 = FrozenAutoMap(a1) fam2 = pickle.loads(pickle.dumps(fam1)) @@ -737,7 +737,7 @@ def test_fam_array_get_all_a(): fam.get_all((3, 3)) with pytest.raises(TypeError): - fam.get_all("a") + fam.get_all('a') with pytest.raises(TypeError): fam.get_all(None) @@ -759,28 +759,28 @@ def test_fam_array_get_all_b(): def test_fam_array_get_all_c(): - a1 = np.array(("a", "bb", "ccc")) + a1 = np.array(('a', 'bb', 'ccc')) a1.flags.writeable = False fam = FrozenAutoMap(a1) with pytest.raises(KeyError): - fam.get_all(["bb", "c"]) + fam.get_all(['bb', 'c']) def test_fam_array_get_all_d1(): - a1 = np.array(("a", "bb", "ccc")) + a1 = np.array(('a', 'bb', 'ccc')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - post1 = fam.get_all(np.array(("bb", "a", "ccc", "a", "bb"))) + post1 = fam.get_all(np.array(('bb', 'a', 'ccc', 'a', 'bb'))) assert post1.tolist() == [1, 0, 2, 0, 1] assert post1.flags.writeable == False def test_fam_array_get_all_d2(): - a1 = np.array(("a", "bb", "ccc")) + a1 = np.array(('a', 'bb', 'ccc')) a1.flags.writeable = False fam = FrozenAutoMap(a1) with pytest.raises(KeyError): - fam.get_all(np.array(("bb", "a", "ccc", "aa"))) + fam.get_all(np.array(('bb', 'a', 'ccc', 'aa'))) def test_fam_array_get_all_e(): @@ -792,100 +792,100 @@ def test_fam_array_get_all_e(): def test_fam_array_get_all_f1(): - a1 = np.array(("a", "bb", "ccc", "dd")) + a1 = np.array(('a', 'bb', 'ccc', 'dd')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - post = fam.get_all(np.array(["ccc", "dd", "bb", "bb"])) + post = fam.get_all(np.array(['ccc', 'dd', 'bb', 'bb'])) assert post.tolist() == [2, 3, 1, 1] def test_fam_array_get_all_f2(): - a1 = np.array(("a", "bb", "ccc", "dd")) + a1 = np.array(('a', 'bb', 'ccc', 'dd')) a1.flags.writeable = False fam = FrozenAutoMap(a1) with pytest.raises(KeyError): - fam.get_all(np.array(["bb", "c"])) + fam.get_all(np.array(['bb', 'c'])) def test_fam_array_get_all_g1(): - a1 = np.array((b"a", b"bb", b"ccc", b"dd")) + a1 = np.array((b'a', b'bb', b'ccc', b'dd')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - post = fam.get_all(np.array([b"ccc", b"dd", b"bb", b"bb"])) + post = fam.get_all(np.array([b'ccc', b'dd', b'bb', b'bb'])) assert post.tolist() == [2, 3, 1, 1] def test_fam_array_get_all_g2(): - a1 = np.array((b"a", b"bb", b"ccc", b"dd")) + a1 = np.array((b'a', b'bb', b'ccc', b'dd')) a1.flags.writeable = False fam = FrozenAutoMap(a1) with pytest.raises(KeyError): - fam.get_all(np.array([b"dd", b"x"])) + fam.get_all(np.array([b'dd', b'x'])) def test_fam_array_get_all_h(): - a1 = np.array((b"a", b"")) + a1 = np.array((b'a', b'')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - post = fam.get_all(np.array([b"", b"", b"a"])) + post = fam.get_all(np.array([b'', b'', b'a'])) assert post.tolist() == [1, 1, 0] def test_fam_array_get_all_i(): - a1 = np.array((b"foo", b"bar")) + a1 = np.array((b'foo', b'bar')) a1.flags.writeable = False fam = FrozenAutoMap(a1) with pytest.raises(KeyError): - _ = fam.get_all(np.array([b"fo", b"ba"])) + _ = fam.get_all(np.array([b'fo', b'ba'])) with pytest.raises(KeyError): - _ = fam.get_all(np.array([b"", b""])) + _ = fam.get_all(np.array([b'', b''])) def test_fam_array_get_all_j(): - a1 = np.array(("aaaaa", "bb", "ccc", "dd")) + a1 = np.array(('aaaaa', 'bb', 'ccc', 'dd')) a1.flags.writeable = False fam = FrozenAutoMap(a1) with pytest.raises(KeyError): - _ = fam.get_all(np.array(["a", "b"])) + _ = fam.get_all(np.array(['a', 'b'])) - assert fam.get_all(np.array(("bb", "dd", "bb", "dd"))).tolist() == [1, 3, 1, 3] + assert fam.get_all(np.array(('bb', 'dd', 'bb', 'dd'))).tolist() == [1, 3, 1, 3] def test_fam_array_get_all_k1(): - a1 = np.array(("2023-01-05", "1854-05-02"), np.datetime64) + a1 = np.array(('2023-01-05', '1854-05-02'), np.datetime64) a1.flags.writeable = False fam = FrozenAutoMap(a1) post = fam.get_all( - np.array(["1854-05-02", "2023-01-05", "2023-01-05"], np.datetime64) + np.array(['1854-05-02', '2023-01-05', '2023-01-05'], np.datetime64) ) assert post.tolist() == [1, 0, 0] def test_fam_array_get_all_k2(): - a1 = np.array(("2023-01-05", "1854-05-02"), np.datetime64) + a1 = np.array(('2023-01-05', '1854-05-02'), np.datetime64) a1.flags.writeable = False fam = FrozenAutoMap(a1) with pytest.raises(KeyError): post = fam.get_all( - np.array(["1854-05-02", "2023-01-05", "2020-01-05"], np.datetime64) + np.array(['1854-05-02', '2023-01-05', '2020-01-05'], np.datetime64) ) def test_fam_array_get_all_l(): - a1 = np.array(("2023-01-05", "1854-05-02", "1988-01-01"), np.datetime64) + a1 = np.array(('2023-01-05', '1854-05-02', '1988-01-01'), np.datetime64) a1.flags.writeable = False fam = FrozenAutoMap(a1) with pytest.raises(KeyError): - _ = fam.get_all(np.array(["2022-01", "2023-01", "1988-01"], np.datetime64)) + _ = fam.get_all(np.array(['2022-01', '2023-01', '1988-01'], np.datetime64)) def test_fam_array_get_all_m1(): @@ -920,32 +920,32 @@ def test_fam_array_get_all_m3(): def test_fam_array_get_any_a1(): - a1 = np.array(("a", "bb", "ccc")) + a1 = np.array(('a', 'bb', 'ccc')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - post1 = fam.get_any(["bbb", "ccc", "a", "bbb"]) + post1 = fam.get_any(['bbb', 'ccc', 'a', 'bbb']) assert post1 == [2, 0] - post2 = fam.get_any(["bbb", "bbb"]) + post2 = fam.get_any(['bbb', 'bbb']) assert post2 == [] def test_fam_array_get_any_a2(): - a1 = np.array(("a", "bb", "ccc")) + a1 = np.array(('a', 'bb', 'ccc')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - post1 = fam.get_any(np.array(("bbb", "a", "ccc", "aa", "bbb"))) + post1 = fam.get_any(np.array(('bbb', 'a', 'ccc', 'aa', 'bbb'))) assert post1 == [0, 2] def test_fam_array_get_any_a3(): - a1 = np.array(("a", "bb", "ccc")) + a1 = np.array(('a', 'bb', 'ccc')) a1.flags.writeable = False fam = FrozenAutoMap(a1) - post1 = fam.get_any(np.array(["bbb", "ccc", "a", "bbb"])) + post1 = fam.get_any(np.array(['bbb', 'ccc', 'a', 'bbb'])) assert post1 == [2, 0] - post2 = fam.get_any(np.array(["bbb", "bbb"])) + post2 = fam.get_any(np.array(['bbb', 'bbb'])) assert post2 == [] @@ -962,74 +962,72 @@ def test_fam_array_get_any_b(): def test_fam_array_get_any_c1(): - a1 = np.array(("2023-01-05", "1854-05-02"), np.datetime64) + a1 = np.array(('2023-01-05', '1854-05-02'), np.datetime64) a1.flags.writeable = False fam = FrozenAutoMap(a1) post = fam.get_any( - np.array( - ["1854-05-02", "nat", "1854-05-02", "2023-01-05", "nat"], np.datetime64 - ) + np.array(['1854-05-02', 'nat', '1854-05-02', '2023-01-05', 'nat'], np.datetime64) ) assert post == [1, 1, 0] def test_fam_array_get_any_c2(): - a1 = np.array(("2023-01-05", "1854-05-02"), np.datetime64) + a1 = np.array(('2023-01-05', '1854-05-02'), np.datetime64) a1.flags.writeable = False fam = FrozenAutoMap(a1) post = fam.get_any( - np.array(["1854-05-02", "2023-01-05", "2020-01-05"], np.datetime64) + np.array(['1854-05-02', '2023-01-05', '2020-01-05'], np.datetime64) ) assert post == [1, 0] def test_fam_array_get_any_d(): - a1 = np.array(("2023-01-05", "1854-05-02", "1988-01-01"), np.datetime64) + a1 = np.array(('2023-01-05', '1854-05-02', '1988-01-01'), np.datetime64) a1.flags.writeable = False fam = FrozenAutoMap(a1) - post = fam.get_any(np.array(["2022-01", "2023-01", "1988-01"], np.datetime64)) + post = fam.get_any(np.array(['2022-01', '2023-01', '1988-01'], np.datetime64)) assert post == [] def test_fam_get_dt64_a(): - a1 = np.array(("2023", "1854", "1988"), np.datetime64) + a1 = np.array(('2023', '1854', '1988'), np.datetime64) a1.flags.writeable = False fam = FrozenAutoMap(a1) - k1 = np.datetime64("1988-01-01") + k1 = np.datetime64('1988-01-01') with pytest.raises(KeyError): _ = fam[k1] - k2 = np.datetime64("2023-01-01") + k2 = np.datetime64('2023-01-01') with pytest.raises(KeyError): _ = fam[k2] + def test_fam_get_dt64_b(): - a1 = np.array(("2023", "1854", "1988"), np.datetime64) + a1 = np.array(('2023', '1854', '1988'), np.datetime64) fam = FrozenAutoMap(list(a1)) - k1 = np.datetime64("1988-01-01") + k1 = np.datetime64('1988-01-01') with pytest.raises(KeyError): _ = fam[k1] - k2 = np.datetime64("2023-01-01") + k2 = np.datetime64('2023-01-01') with pytest.raises(KeyError): _ = fam[k2] def test_am_get_dt64_a(): - a1 = np.array(("2023", "1854", "1988"), np.datetime64) + a1 = np.array(('2023', '1854', '1988'), np.datetime64) a1.flags.writeable = False fam = AutoMap(a1) - k1 = np.datetime64("1988-01-01") + k1 = np.datetime64('1988-01-01') with pytest.raises(KeyError): _ = fam[k1] - k2 = np.datetime64("2023-01-01") + k2 = np.datetime64('2023-01-01') with pytest.raises(KeyError): _ = fam[k2] - diff --git a/test/test_auto_map_property.py b/test/test_auto_map_property.py index 006e70f1..e6d69ed2 100644 --- a/test/test_auto_map_property.py +++ b/test/test_auto_map_property.py @@ -19,8 +19,8 @@ Keys = tp.Set[tp.Union[int, str, float, bool, bytes, tp.Tuple[int, ...]]] -NATIVE_BYTE_ORDER = "<" if sys.byteorder == "little" else ">" -VALID_BYTE_ORDERS = ("=", NATIVE_BYTE_ORDER) +NATIVE_BYTE_ORDER = '<' if sys.byteorder == 'little' else '>' +VALID_BYTE_ORDERS = ('=', NATIVE_BYTE_ORDER) def get_array() -> st.SearchStrategy: @@ -29,9 +29,9 @@ def get_array() -> st.SearchStrategy: """ def proc(a: np.ndarray, contiguous: bool): - if a.dtype.kind in ("f", "c"): + if a.dtype.kind in ('f', 'c'): a = a[~np.isnan(a)] - elif a.dtype.kind in ("m", "M"): + elif a.dtype.kind in ('m', 'M'): a = a[~np.isnat(a)] if a.dtype.byteorder not in VALID_BYTE_ORDERS: @@ -48,9 +48,9 @@ def proc(a: np.ndarray, contiguous: bool): return a def strategy(contiguous: bool): - return arrays( - shape=1, unique=True, fill=st.nothing(), dtype=scalar_dtypes() - ).map(partial(proc, contiguous=contiguous)) + return arrays(shape=1, unique=True, fill=st.nothing(), dtype=scalar_dtypes()).map( + partial(proc, contiguous=contiguous) + ) return st.one_of( strategy(contiguous=True), @@ -192,7 +192,7 @@ def test_am_non_unique_exception(keys: Keys): @given(keys=get_array()) def test_fam_array_non_unique_exception(keys: Keys): with warnings.catch_warnings(): - warnings.simplefilter("ignore") + warnings.simplefilter('ignore') hypothesis.assume(keys.size) duplicate = next(iter(keys)) diff --git a/test/test_block_index.py b/test/test_block_index.py index 7185bf92..828b06e8 100644 --- a/test/test_block_index.py +++ b/test/test_block_index.py @@ -10,7 +10,6 @@ class TestUnit(unittest.TestCase): - def test_block_index_init_a(self) -> None: bi1 = BlockIndex() self.assertEqual(bi1.dtype, np.dtype(float)) @@ -30,7 +29,7 @@ def test_block_index_init_b1(self) -> None: def test_block_index_init_c1(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(12).reshape(2,6)) + bi1.register(np.arange(12).reshape(2, 6)) bi1.register(np.arange(2)) block, row, bir_count, bir_capacity, bi, dt = bi1.__getstate__() @@ -40,19 +39,18 @@ def test_block_index_init_c1(self) -> None: def test_block_index_init_c2(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(12).reshape(2,6)) + bi1.register(np.arange(12).reshape(2, 6)) bi1.register(np.arange(2)) block, row, bir_count, bir_capacity, bi, dt = bi1.__getstate__() with self.assertRaises(TypeError): bi2 = BlockIndex(block, row, bir_count, bir_capacity, bi, 'a') - def test_block_index_init_d(self) -> None: bi1 = BlockIndex() self.assertTrue('None' in repr(bi1)) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_register_a(self) -> None: bi1 = BlockIndex() @@ -69,16 +67,14 @@ def test_block_index_register_b(self) -> None: bi1.register(np.array(0)) with self.assertRaises(ErrorInitTypeBlocks): - bi1.register(np.arange(12).reshape(2,3,2)) - + bi1.register(np.arange(12).reshape(2, 3, 2)) def test_block_index_register_c(self) -> None: bi1 = BlockIndex() bi1.register(np.array((3, 4, 5))) bi1.register(np.array((3, 4, 5))) - bi1.register(np.arange(6).reshape(3,2)) - self.assertEqual(bi1.to_list(), - [(0, 0), (1, 0), (2, 0), (2, 1)]) + bi1.register(np.arange(6).reshape(3, 2)) + self.assertEqual(bi1.to_list(), [(0, 0), (1, 0), (2, 0), (2, 1)]) self.assertEqual(bi1.shape, (3, 4)) self.assertEqual(bi1.rows, 3) self.assertEqual(bi1.columns, 4) @@ -86,12 +82,28 @@ def test_block_index_register_c(self) -> None: def test_block_index_register_d(self) -> None: bi1 = BlockIndex() bi1.register(np.arange(2)) - bi1.register(np.arange(12).reshape(2,6)) + bi1.register(np.arange(12).reshape(2, 6)) bi1.register(np.arange(2)) - bi1.register(np.arange(12).reshape(2,6)) - self.assertEqual(bi1.to_list(), - [(0, 0), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (2, 0), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5)] - ) + bi1.register(np.arange(12).reshape(2, 6)) + self.assertEqual( + bi1.to_list(), + [ + (0, 0), + (1, 0), + (1, 1), + (1, 2), + (1, 3), + (1, 4), + (1, 5), + (2, 0), + (3, 0), + (3, 1), + (3, 2), + (3, 3), + (3, 4), + (3, 5), + ], + ) self.assertEqual(bi1.shape, (2, 14)) self.assertEqual(bi1.rows, 2) self.assertEqual(bi1.columns, 14) @@ -100,8 +112,7 @@ def test_block_index_register_e(self) -> None: bi1 = BlockIndex() bi1.register(np.arange(2)) with self.assertRaises(ErrorInitTypeBlocks): - bi1.register(np.arange(12).reshape(3,4)) - + bi1.register(np.arange(12).reshape(3, 4)) def test_block_index_register_f(self) -> None: bi1 = BlockIndex() @@ -110,7 +121,6 @@ def test_block_index_register_f(self) -> None: self.assertEqual(bi1.rows, 2) self.assertEqual(bi1.columns, 10_000) - def test_block_index_register_g(self) -> None: bi1 = BlockIndex() a1 = np.array(()).reshape(4, 0) @@ -124,7 +134,6 @@ def test_block_index_register_g(self) -> None: self.assertEqual(bi1.shape, (4, 2)) self.assertEqual(bi1.dtype, np.dtype(bool)) - def test_block_index_register_h(self) -> None: bi1 = BlockIndex() a1 = np.array(()).reshape(0, 4).astype(bool) @@ -143,7 +152,6 @@ def test_block_index_register_h(self) -> None: self.assertEqual(bi1.shape, (0, 7)) self.assertEqual(bi1.dtype, np.dtype(object)) - def test_block_index_register_i(self) -> None: bi1 = BlockIndex() # NOTE: this value in one context returned an unset exception; I think I have now covered those cases but cannot reproduce the failure; testing the full size is too slow, so reducing here as a placeholder @@ -151,31 +159,27 @@ def test_block_index_register_i(self) -> None: post = bi1.register(np.array(()).reshape(0, size)) self.assertEqual(bi1.shape, (0, size)) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_to_bytes_a(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(6).reshape(2,3)) - bi1.register(np.arange(4).reshape(2,2)) - self.assertEqual(bi1.to_list(), - [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1)] - ) + bi1.register(np.arange(6).reshape(2, 3)) + bi1.register(np.arange(4).reshape(2, 2)) + self.assertEqual(bi1.to_list(), [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1)]) data = bi1.to_bytes() bd = ctypes.sizeof(ctypes.c_ssize_t) - post = [int.from_bytes( - data[slice(i, i+bd)], sys.byteorder, signed=True) for i in - range(0, len(data), bd) - ] + post = [ + int.from_bytes(data[slice(i, i + bd)], sys.byteorder, signed=True) + for i in range(0, len(data), bd) + ] self.assertEqual(post, [0, 0, 0, 1, 0, 2, 1, 0, 1, 1]) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_copy_a(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(12).reshape(2,6)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(12).reshape(2, 6)) + bi1.register(np.arange(4).reshape(2, 2)) s1 = bi1.shape bi2 = bi1.copy() self.assertEqual(bi1.to_list(), bi2.to_list()) @@ -185,7 +189,7 @@ def test_block_index_copy_a(self) -> None: def test_block_index_copy_b(self) -> None: dt1 = np.dtype(np.float64) - bi1 = BlockIndex(0, 2, 0, 8, b"", dt1) + bi1 = BlockIndex(0, 2, 0, 8, b'', dt1) bi2 = bi1.copy() dt2 = bi1.dtype del dt1 @@ -194,77 +198,74 @@ def test_block_index_copy_b(self) -> None: del bi2 self.assertEqual(dt2, np.dtype(np.float64)) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_sizeof_a(self) -> None: bi1 = BlockIndex() so1 = sys.getsizeof(bi1) - bi1.register(np.arange(100).reshape(2,50)) + bi1.register(np.arange(100).reshape(2, 50)) so2 = sys.getsizeof(bi1) self.assertTrue(so1 < so2) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_len_a(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(12).reshape(2,6)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(12).reshape(2, 6)) + bi1.register(np.arange(4).reshape(2, 2)) self.assertEqual(len(bi1), 8) def test_block_index_len_b(self) -> None: bi1 = BlockIndex() self.assertEqual(len(bi1), 0) - #--------------------------------------------------------------------------- - + # --------------------------------------------------------------------------- def test_block_index_getitem_a(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(12).reshape(2,6)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(12).reshape(2, 6)) + bi1.register(np.arange(4).reshape(2, 2)) self.assertEqual(bi1[3], (0, 3)) self.assertEqual(bi1[7], (1, 1)) with self.assertRaises(IndexError): bi1[8] - def test_block_index_getitem_b(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(12).reshape(2,6)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(12).reshape(2, 6)) + bi1.register(np.arange(4).reshape(2, 2)) # lookup by scalar a1 = np.array([3, 7]) self.assertEqual(bi1[a1[0]], (0, 3)) self.assertEqual(bi1[a1[1]], (1, 1)) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_getitem_a(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(12).reshape(2,6)) + bi1.register(np.arange(12).reshape(2, 6)) self.assertEqual(bi1.shape, (2, 6)) self.assertEqual(bi1.columns, 6) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) self.assertEqual(bi1.shape, (2, 8)) self.assertEqual(bi1.columns, 8) def test_block_index_getitem_b(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(12).reshape(2,6)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(12).reshape(2, 6)) + bi1.register(np.arange(4).reshape(2, 2)) with self.assertRaises(TypeError): bi1['a'] with self.assertRaises(TypeError): bi1[3:5] - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_get_state_a(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(12).reshape(2,6)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(12).reshape(2, 6)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) block, row, bir_count, bir_capacity, bi, dt = bi1.__getstate__() @@ -275,11 +276,11 @@ def test_block_index_get_state_a(self) -> None: bi2 = BlockIndex(block, row, bir_count, bir_capacity, bi, dt) self.assertEqual(repr(bi1), repr(bi2)) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_pickle_a(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(12).reshape(2,6)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(12).reshape(2, 6)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) msg = pickle.dumps(bi1) @@ -288,7 +289,7 @@ def test_block_index_pickle_a(self) -> None: self.assertEqual(repr(bi1), repr(bi2)) self.assertEqual(bi1.to_list(), bi2.to_list()) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_dtype_a(self) -> None: bi1 = BlockIndex() bi1.register(np.arange(2)) @@ -308,12 +309,11 @@ def test_block_index_dtype_b(self) -> None: bi1.register(np.arange(2).astype(bool)) self.assertEqual(bi1.dtype, np.dtype(object)) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_get_block_a(self) -> None: bi1 = BlockIndex() bi1.register(np.arange(2)) - bi1.register(np.arange(10).reshape(2,5)) + bi1.register(np.arange(10).reshape(2, 5)) bi1.register(np.arange(2)) self.assertEqual(bi1.get_block(6), 2) @@ -324,7 +324,7 @@ def test_block_index_get_block_a(self) -> None: def test_block_index_get_column_a(self) -> None: bi1 = BlockIndex() bi1.register(np.arange(2)) - bi1.register(np.arange(10).reshape(2,5)) + bi1.register(np.arange(10).reshape(2, 5)) bi1.register(np.arange(2)) self.assertEqual(bi1.get_column(6), 0) @@ -332,12 +332,11 @@ def test_block_index_get_column_a(self) -> None: self.assertEqual(bi1.get_column(1), 0) self.assertEqual(bi1.get_column(0), 0) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_iter_a1(self) -> None: bi1 = BlockIndex() bi1.register(np.arange(2)) - bi1.register(np.arange(6).reshape(2,3)) + bi1.register(np.arange(6).reshape(2, 3)) bi1.register(np.arange(2)) biit = iter(bi1) @@ -348,9 +347,9 @@ def test_block_index_iter_a1(self) -> None: def test_block_index_iter_a2(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) with self.assertRaises(TypeError): _ = bi1.iter_select(None) @@ -359,162 +358,152 @@ def test_block_index_iter_a2(self) -> None: _ = bi1.iter_select(np.array(['a', 'b'])) with self.assertRaises(TypeError): - _ = bi1.iter_select(np.arange(4).reshape(2,2)) + _ = bi1.iter_select(np.arange(4).reshape(2, 2)) def test_block_index_iter_b1(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) - biit1 = bi1.iter_select(np.array([0,3,4])) + biit1 = bi1.iter_select(np.array([0, 3, 4])) self.assertEqual(list(biit1), [(0, 0), (2, 0), (2, 1)]) self.assertEqual(list(reversed(biit1)), [(2, 1), (2, 0), (0, 0)]) - biit2 = bi1.iter_select(np.array([0,3,4], dtype=np.uint8)) + biit2 = bi1.iter_select(np.array([0, 3, 4], dtype=np.uint8)) self.assertEqual(list(biit2), [(0, 0), (2, 0), (2, 1)]) self.assertEqual(list(reversed(biit2)), [(2, 1), (2, 0), (0, 0)]) def test_block_index_iter_b2(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) - biit1 = bi1.iter_select(list(np.array([0,3,4]))) + biit1 = bi1.iter_select(list(np.array([0, 3, 4]))) self.assertEqual(list(biit1), [(0, 0), (2, 0), (2, 1)]) self.assertEqual(list(reversed(biit1)), [(2, 1), (2, 0), (0, 0)]) def test_block_index_iter_c(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) - biit1 = bi1.iter_select([0,3,4]) + biit1 = bi1.iter_select([0, 3, 4]) self.assertEqual(list(biit1), [(0, 0), (2, 0), (2, 1)]) self.assertEqual(list(reversed(biit1)), [(2, 1), (2, 0), (0, 0)]) - biit2 = bi1.iter_select([0,3,4]) + biit2 = bi1.iter_select([0, 3, 4]) self.assertEqual(list(biit2), [(0, 0), (2, 0), (2, 1)]) self.assertEqual(list(reversed(biit2)), [(2, 1), (2, 0), (0, 0)]) - def test_block_index_iter_d(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) with self.assertRaises(TypeError): - _ = list(bi1.iter_select([0,3,'b'])) - + _ = list(bi1.iter_select([0, 3, 'b'])) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_iter_select_slice_a(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - bi1.register(np.arange(10).reshape(2,5)) + bi1.register(np.arange(10).reshape(2, 5)) - self.assertEqual(list(bi1.iter_select((slice(None)))), - [(0, 0), (0, 1), (1, 0), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4)] - ) + self.assertEqual( + list(bi1.iter_select((slice(None)))), + [(0, 0), (0, 1), (1, 0), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4)], + ) - self.assertEqual(list(bi1.iter_select((slice(4, None)))), - [(2, 1), (2, 2), (2, 3), (2, 4)] - ) + self.assertEqual( + list(bi1.iter_select((slice(4, None)))), [(2, 1), (2, 2), (2, 3), (2, 4)] + ) - self.assertEqual(list(bi1.iter_select((slice(None)))), - [(0, 0), (0, 1), (1, 0), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4)] - ) + self.assertEqual( + list(bi1.iter_select((slice(None)))), + [(0, 0), (0, 1), (1, 0), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4)], + ) - self.assertEqual(list(bi1.iter_select((slice(1, 8, 2)))), - [(0, 1), (2, 0), (2, 2), (2, 4)] - ) + self.assertEqual( + list(bi1.iter_select((slice(1, 8, 2)))), [(0, 1), (2, 0), (2, 2), (2, 4)] + ) def test_block_index_iter_select_slice_b(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - bi1.register(np.arange(10).reshape(2,5)) + bi1.register(np.arange(10).reshape(2, 5)) - self.assertEqual(list(bi1.iter_select((slice(7, 3, -1)))), - [(2, 4), (2, 3), (2, 2), (2, 1)] - ) + self.assertEqual( + list(bi1.iter_select((slice(7, 3, -1)))), [(2, 4), (2, 3), (2, 2), (2, 1)] + ) - self.assertEqual(list(bi1.iter_select((slice(None, None, -1)))), - [(2, 4), (2, 3), (2, 2), (2, 1), (2, 0), (1, 0), (0, 1), (0, 0)] - ) + self.assertEqual( + list(bi1.iter_select((slice(None, None, -1)))), + [(2, 4), (2, 3), (2, 2), (2, 1), (2, 0), (1, 0), (0, 1), (0, 0)], + ) def test_block_index_iter_select_slice_c(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - bi1.register(np.arange(6).reshape(2,3)) - - self.assertEqual(list(bi1.iter_select(slice(1,5))), - [(0, 1), (1, 0), (2, 0), (2, 1)] - ) + bi1.register(np.arange(6).reshape(2, 3)) - self.assertEqual(list(reversed(bi1.iter_select(slice(1,5)))), - [(2, 1), (2, 0), (1, 0), (0, 1)] - ) + self.assertEqual( + list(bi1.iter_select(slice(1, 5))), [(0, 1), (1, 0), (2, 0), (2, 1)] + ) + self.assertEqual( + list(reversed(bi1.iter_select(slice(1, 5)))), + [(2, 1), (2, 0), (1, 0), (0, 1)], + ) def test_block_index_iter_select_slice_d(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(6).reshape(2,3)) + bi1.register(np.arange(6).reshape(2, 3)) bi1.register(np.arange(2)) - self.assertEqual(list(bi1.iter_select(slice(None))), - [(0, 0), (0, 1), (0, 2), (1, 0)] - ) - self.assertEqual(list(bi1.iter_select(slice(20, 24))), - [] - ) - self.assertEqual(list(bi1.iter_select(slice(0, 100, 10))), - [(0, 0)] - ) - self.assertEqual(list(bi1.iter_select(slice(0, 100, 3))), - [(0, 0), (1, 0)] - ) + self.assertEqual( + list(bi1.iter_select(slice(None))), [(0, 0), (0, 1), (0, 2), (1, 0)] + ) + self.assertEqual(list(bi1.iter_select(slice(20, 24))), []) + self.assertEqual(list(bi1.iter_select(slice(0, 100, 10))), [(0, 0)]) + self.assertEqual(list(bi1.iter_select(slice(0, 100, 3))), [(0, 0), (1, 0)]) def test_block_index_iter_select_slice_e(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(12).reshape(2,6)) - bi1.register(np.arange(12).reshape(2,6)) - - self.assertEqual(list(bi1.iter_select(slice(11, None, -3))), - [(1, 5), (1, 2), (0, 5), (0, 2)] - ) - self.assertEqual(list(bi1.iter_select(slice(11, None, -4))), - [(1, 5), (1, 1), (0, 3)] - ) + bi1.register(np.arange(12).reshape(2, 6)) + bi1.register(np.arange(12).reshape(2, 6)) + self.assertEqual( + list(bi1.iter_select(slice(11, None, -3))), [(1, 5), (1, 2), (0, 5), (0, 2)] + ) + self.assertEqual( + list(bi1.iter_select(slice(11, None, -4))), [(1, 5), (1, 1), (0, 3)] + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_iter_select_boolean_a(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - bi1.register(np.arange(10).reshape(2,5)) + bi1.register(np.arange(10).reshape(2, 5)) sel1 = np.array([x % 2 == 0 for x in range(len(bi1))]) - self.assertEqual(list(bi1.iter_select(sel1)), - [(0, 0), (1, 0), (2, 1), (2, 3)] - ) + self.assertEqual(list(bi1.iter_select(sel1)), [(0, 0), (1, 0), (2, 1), (2, 3)]) sel2 = np.full(len(bi1), False) sel2[0] = True sel2[-1] = True - self.assertEqual(list(bi1.iter_select(sel2)), - [(0, 0), (2, 4)] - ) + self.assertEqual(list(bi1.iter_select(sel2)), [(0, 0), (2, 4)]) def test_block_index_iter_select_boolean_b(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) with self.assertRaises(TypeError): @@ -523,58 +512,52 @@ def test_block_index_iter_select_boolean_b(self) -> None: with self.assertRaises(TypeError): bi1.iter_select(np.full(20, True)) - def test_block_index_iter_select_boolean_c(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - self.assertEqual(list(bi1.iter_select(np.full(len(bi1), False))), - [] - ) - self.assertEqual(list(bi1.iter_select(np.full(len(bi1), True))), - [(0, 0), (0, 1), (1, 0)] - ) + self.assertEqual(list(bi1.iter_select(np.full(len(bi1), False))), []) + self.assertEqual( + list(bi1.iter_select(np.full(len(bi1), True))), [(0, 0), (0, 1), (1, 0)] + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_iter_select_sequence_a(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - bi1.register(np.arange(10).reshape(2,5)) + bi1.register(np.arange(10).reshape(2, 5)) - self.assertEqual(list(bi1.iter_select([0, -1, -2, -8])), - [(0, 0), (2, 4), (2, 3), (0, 0)] - ) - self.assertEqual(list(bi1.iter_select(np.array([0, -1, -2, -8]))), - [(0, 0), (2, 4), (2, 3), (0, 0)] - ) + self.assertEqual( + list(bi1.iter_select([0, -1, -2, -8])), [(0, 0), (2, 4), (2, 3), (0, 0)] + ) + self.assertEqual( + list(bi1.iter_select(np.array([0, -1, -2, -8]))), + [(0, 0), (2, 4), (2, 3), (0, 0)], + ) def test_block_index_iter_select_sequence_b(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - bi1.register(np.arange(10).reshape(2,5)) + bi1.register(np.arange(10).reshape(2, 5)) with self.assertRaises(IndexError): _ = list(bi1.iter_select([-9])) - - def test_block_index_iter_select_sequence_c(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(4).reshape(2, 2)) bi1.register(np.arange(2)) - bi1.register(np.arange(10).reshape(2,5)) + bi1.register(np.arange(10).reshape(2, 5)) with self.assertRaises(TypeError): _ = list(bi1.iter_select(['b', 'c'])) - - def test_block_index_iter_select_sequence_d(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(10).reshape(2,5)) + bi1.register(np.arange(10).reshape(2, 5)) sel = [0, 3, 4] it1 = iter(bi1.iter_select(sel)) @@ -584,89 +567,80 @@ def test_block_index_iter_select_sequence_d(self) -> None: del it1 self.assertEqual(list(it2), [(0, 0), (0, 3), (0, 4)]) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_iter_contiguous_a(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(6).reshape(2,3)) + bi1.register(np.arange(6).reshape(2, 3)) bi1.register(np.arange(2)) - bi1.register(np.arange(6).reshape(2,3)) + bi1.register(np.arange(6).reshape(2, 3)) bi1.register(np.arange(2)) self.assertEqual( - list(bi1.iter_contiguous([1,2,6,7])), - [(0, slice(1, 3, None)), (2, slice(2, 3, None)), (3, slice(0, 1, None))] - ) + list(bi1.iter_contiguous([1, 2, 6, 7])), + [(0, slice(1, 3, None)), (2, slice(2, 3, None)), (3, slice(0, 1, None))], + ) self.assertEqual( - list(bi1.iter_contiguous([7,6,2,1])), - [(3, slice(0, 1, None)), (2, slice(2, 3, None)), (0, slice(2, 0, -1))] - ) + list(bi1.iter_contiguous([7, 6, 2, 1])), + [(3, slice(0, 1, None)), (2, slice(2, 3, None)), (0, slice(2, 0, -1))], + ) self.assertEqual( list(bi1.iter_contiguous([7, 6, 2, 1], ascending=True)), - [(0, slice(1, 3, None)), (2, slice(2, 3, None)), (3, slice(0, 1, None))] - ) - + [(0, slice(1, 3, None)), (2, slice(2, 3, None)), (3, slice(0, 1, None))], + ) def test_block_index_iter_contiguous_b(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(16).reshape(2,8)) + bi1.register(np.arange(16).reshape(2, 8)) self.assertEqual( - list(bi1.iter_contiguous([0,1,6,7])), - [(0, slice(0, 2, None)), (0, slice(6, 8, None))] - ) - self.assertEqual( - list(bi1.iter_contiguous(slice(None))), - [(0, slice(0, 8, None))] - ) - self.assertEqual( - list(bi1.iter_contiguous(slice(1, 6))), - [(0, slice(1, 6, None))] - ) + list(bi1.iter_contiguous([0, 1, 6, 7])), + [(0, slice(0, 2, None)), (0, slice(6, 8, None))], + ) + self.assertEqual(list(bi1.iter_contiguous(slice(None))), [(0, slice(0, 8, None))]) + self.assertEqual(list(bi1.iter_contiguous(slice(1, 6))), [(0, slice(1, 6, None))]) self.assertEqual( list(bi1.iter_contiguous(slice(0, 8, 3))), - [(0, slice(0, 1, None)), (0, slice(3, 4, None)), (0, slice(6, 7, None))] - ) + [(0, slice(0, 1, None)), (0, slice(3, 4, None)), (0, slice(6, 7, None))], + ) self.assertEqual( list(bi1.iter_contiguous(slice(0, 8, 3), reduce=True)), - [(0, 0), (0, 3), (0, 6)] - ) + [(0, 0), (0, 3), (0, 6)], + ) def test_block_index_iter_contiguous_c(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(16).reshape(2,8)) + bi1.register(np.arange(16).reshape(2, 8)) with self.assertRaises(TypeError): - list(bi1.iter_contiguous([0,1,6,7], False)) - + list(bi1.iter_contiguous([0, 1, 6, 7], False)) def test_block_index_iter_contiguous_d(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(8).reshape(2,4)) - bi1.register(np.arange(8).reshape(2,4)) + bi1.register(np.arange(8).reshape(2, 4)) + bi1.register(np.arange(8).reshape(2, 4)) self.assertEqual( - list(bi1.iter_contiguous(slice(7,1,-1))), - [(1, slice(3, None, -1)), (0, slice(3, 1, -1))] - ) + list(bi1.iter_contiguous(slice(7, 1, -1))), + [(1, slice(3, None, -1)), (0, slice(3, 1, -1))], + ) self.assertEqual( - list(bi1.iter_contiguous(slice(7,1,-1), ascending=True)), - [(0, slice(2, 4)), (1, slice(0, 4))] - ) + list(bi1.iter_contiguous(slice(7, 1, -1), ascending=True)), + [(0, slice(2, 4)), (1, slice(0, 4))], + ) self.assertEqual( - list(bi1.iter_contiguous(slice(8,1,-1), ascending=True)), - [(0, slice(2, 4)), (1, slice(0, 4))] - ) + list(bi1.iter_contiguous(slice(8, 1, -1), ascending=True)), + [(0, slice(2, 4)), (1, slice(0, 4))], + ) self.assertEqual( - list(bi1.iter_contiguous(slice(8,None,-1), ascending=True)), - [(0, slice(0, 4)), (1, slice(0, 4))] - ) + list(bi1.iter_contiguous(slice(8, None, -1), ascending=True)), + [(0, slice(0, 4)), (1, slice(0, 4))], + ) def test_block_index_iter_contiguous_e1(self) -> None: bi1 = BlockIndex() @@ -681,21 +655,21 @@ def test_block_index_iter_contiguous_e1(self) -> None: self.assertEqual( list(bi1.iter_contiguous([6, 0, 7])), - [(6, slice(0, 1)), (0, slice(0, 1)), (7, slice(0, 1))] - ) + [(6, slice(0, 1)), (0, slice(0, 1)), (7, slice(0, 1))], + ) self.assertEqual( list(bi1.iter_contiguous([6, 0, 7], ascending=True)), - [(0, slice(0, 1)), (6, slice(0, 1)), (7, slice(0, 1))] - ) + [(0, slice(0, 1)), (6, slice(0, 1)), (7, slice(0, 1))], + ) self.assertEqual( list(bi1.iter_contiguous(np.array([6, 0, 7]))), - [(6, slice(0, 1)), (0, slice(0, 1)), (7, slice(0, 1))] - ) + [(6, slice(0, 1)), (0, slice(0, 1)), (7, slice(0, 1))], + ) self.assertEqual( list(bi1.iter_contiguous(np.array([6, 0, 7]), ascending=True)), - [(0, slice(0, 1)), (6, slice(0, 1)), (7, slice(0, 1))] - ) + [(0, slice(0, 1)), (6, slice(0, 1)), (7, slice(0, 1))], + ) def test_block_index_iter_contiguous_e2(self) -> None: bi1 = BlockIndex() @@ -709,69 +683,64 @@ def test_block_index_iter_contiguous_e2(self) -> None: bi1.register(np.arange(2)) self.assertEqual( - list(bi1.iter_contiguous([6, 0, 7], reduce=True)), - [(6, 0), (0, 0), (7, 0)] - ) + list(bi1.iter_contiguous([6, 0, 7], reduce=True)), [(6, 0), (0, 0), (7, 0)] + ) self.assertEqual( list(bi1.iter_contiguous([6, 0, 7], ascending=True, reduce=True)), - [(0, 0), (6, 0), (7, 0)] - ) + [(0, 0), (6, 0), (7, 0)], + ) self.assertEqual( list(bi1.iter_contiguous(np.array([6, 0, 7]), reduce=True)), - [(6, 0), (0, 0), (7, 0)] - ) + [(6, 0), (0, 0), (7, 0)], + ) self.assertEqual( list(bi1.iter_contiguous(np.array([6, 0, 7]), ascending=True, reduce=True)), - [(0, 0), (6, 0), (7, 0)] - ) - + [(0, 0), (6, 0), (7, 0)], + ) def test_block_index_iter_contiguous_f1(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(6).reshape(2,3)) - bi1.register(np.arange(6).reshape(2,3)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(6).reshape(2, 3)) + bi1.register(np.arange(6).reshape(2, 3)) + bi1.register(np.arange(4).reshape(2, 2)) key = np.array([2, 3, 5]) def gen1(): yield from bi1.iter_select(key) + post1 = list(gen1()) self.assertEqual(post1, [(0, 2), (1, 0), (1, 2)]) - - def test_block_index_iter_contiguous_f2(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(6).reshape(2,3)) - bi1.register(np.arange(6).reshape(2,3)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(6).reshape(2, 3)) + bi1.register(np.arange(6).reshape(2, 3)) + bi1.register(np.arange(4).reshape(2, 2)) key = np.array([2, 3, 5]) def gen2(): yield from bi1.iter_contiguous(key) + post2 = list(gen2()) post1 = list(bi1.iter_contiguous(key)) self.assertEqual(post1, post2) - - def test_block_index_iter_contiguous_g(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(6).reshape(2,3)) - bi1.register(np.arange(6).reshape(2,3)) - bi1.register(np.arange(4).reshape(2,2)) + bi1.register(np.arange(6).reshape(2, 3)) + bi1.register(np.arange(6).reshape(2, 3)) + bi1.register(np.arange(4).reshape(2, 2)) with self.assertRaises(TypeError): _ = list(bi1.iter_contiguous('a')) - def test_block_index_iter_contiguous_h1(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(6).reshape(2,3)) - bi1.register(np.arange(6).reshape(2,3)) + bi1.register(np.arange(6).reshape(2, 3)) + bi1.register(np.arange(6).reshape(2, 3)) sel = np.array([1, 1, 1, 0, 0, 0]).astype(bool) post1 = list(bi1.iter_contiguous(sel)) @@ -781,29 +750,25 @@ def test_block_index_iter_contiguous_h1(self) -> None: def test_block_index_iter_contiguous_h2(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(6).reshape(2,3)) - bi1.register(np.arange(6).reshape(2,3)) + bi1.register(np.arange(6).reshape(2, 3)) + bi1.register(np.arange(6).reshape(2, 3)) sel = np.array([1, 0, 1, 0, 1, 0]).astype(bool) post1 = list(bi1.iter_contiguous(sel)) post2 = list(bi1.iter_contiguous(sel, ascending=True)) self.assertEqual(post1, post2) - self.assertEqual(post1, - [(0, slice(0, 1, None)), - (0, slice(2, 3, None)), - (1, slice(1, 2, None))]) + self.assertEqual( + post1, + [(0, slice(0, 1, None)), (0, slice(2, 3, None)), (1, slice(1, 2, None))], + ) post3 = list(bi1.iter_contiguous(sel, ascending=True, reduce=True)) - self.assertEqual(post3, - [(0, 0), - (0, 2), - (1, 1)]) - + self.assertEqual(post3, [(0, 0), (0, 2), (1, 1)]) def test_block_index_iter_contiguous_i1(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(6).reshape(2,3)) - bi1.register(np.arange(6).reshape(2,3)) + bi1.register(np.arange(6).reshape(2, 3)) + bi1.register(np.arange(6).reshape(2, 3)) self.assertEqual(list(bi1.iter_select(slice(0, 0))), []) self.assertEqual(list(bi1.iter_contiguous(slice(0, 0))), []) @@ -813,8 +778,8 @@ def test_block_index_iter_contiguous_i1(self) -> None: def test_block_index_iter_contiguous_i2(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(6).reshape(2,3)) - bi1.register(np.arange(6).reshape(2,3)) + bi1.register(np.arange(6).reshape(2, 3)) + bi1.register(np.arange(6).reshape(2, 3)) self.assertEqual(list(bi1.iter_select([])), []) self.assertEqual(list(bi1.iter_contiguous([])), []) @@ -822,26 +787,22 @@ def test_block_index_iter_contiguous_i2(self) -> None: self.assertEqual(list(bi1.iter_select(np.full(len(bi1), False))), []) self.assertEqual(list(bi1.iter_contiguous(np.full(len(bi1), False))), []) - - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_iter_block_a(self) -> None: bi1 = BlockIndex() - bi1.register(np.arange(6).reshape(2,3)) + bi1.register(np.arange(6).reshape(2, 3)) bi1.register(np.arange(2)) - bi1.register(np.arange(6).reshape(2,3)) + bi1.register(np.arange(6).reshape(2, 3)) slc = slice(None) self.assertEqual(list(bi1.iter_block()), [(0, slc), (1, slc), (2, slc)]) self.assertEqual(list(reversed(bi1.iter_block())), [(2, slc), (1, slc), (0, slc)]) - def test_block_index_iter_block_b(self) -> None: bi1 = BlockIndex() self.assertEqual(list(bi1.iter_block()), []) - def test_block_index_iter_block_c(self) -> None: bi1 = BlockIndex() bi1.register(np.arange(2)) @@ -856,16 +817,16 @@ def test_block_index_iter_block_c(self) -> None: slc = slice(None) self.assertEqual(list(bi1.iter_block()), [(i, slc) for i in range(8)]) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_block_index_shape_a(self) -> None: bi1 = BlockIndex() self.assertEqual(bi1.shape, (0, 0)) - self.assertEqual(bi1.rows, -1) # kept to show no assignemt + self.assertEqual(bi1.rows, -1) # kept to show no assignemt - bi1.register(np.array(()).reshape(2,0)) + bi1.register(np.array(()).reshape(2, 0)) self.assertEqual(bi1.shape, (2, 0)) self.assertEqual(bi1.rows, 2) with self.assertRaises(ErrorInitTypeBlocks): - bi1.register(np.array(()).reshape(3,0)) + bi1.register(np.array(()).reshape(3, 0)) diff --git a/test/test_delimited_to_arrays.py b/test/test_delimited_to_arrays.py index 8736e55b..a63e5a6e 100644 --- a/test/test_delimited_to_arrays.py +++ b/test/test_delimited_to_arrays.py @@ -8,7 +8,6 @@ class TestUnit(unittest.TestCase): - def test_iterable_str_to_array_1d_a(self) -> None: a1 = iterable_str_to_array_1d(['1', '3', '4'], int) self.assertEqual(a1.tolist(), [1, 3, 4]) @@ -21,7 +20,7 @@ def test_iterable_str_to_array_1d_a(self) -> None: with self.assertRaises(NotImplementedError): a3 = iterable_str_to_array_1d(['1', '3', '4'], object) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_iterable_str_to_array_1d_bool_1(self) -> None: a1 = iterable_str_to_array_1d(['true', 'false', 'TRUE', 'FALSE'], bool) @@ -38,7 +37,6 @@ def test_iterable_str_to_array_1d_bool_2(self) -> None: # same as genfromtxt self.assertEqual(a1.tolist(), np.genfromtxt(src, dtype=bool).tolist()) - def test_iterable_str_to_array_1d_bool_3(self) -> None: a1 = iterable_str_to_array_1d(['sd', 'er', 'TRUE', 'twerwersdfsd'], bool) self.assertEqual(a1.tolist(), [False, False, True, False]) @@ -51,7 +49,7 @@ def test_iterable_str_to_array_1d_bool_4(self) -> None: self.assertEqual(a1.dtype, np.dtype(bool)) self.assertFalse(a1.flags.writeable) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_iterable_str_to_array_1d_int_1(self) -> None: # NOTE: floats will be truncated @@ -68,20 +66,20 @@ def test_iterable_str_to_array_1d_int_2(self) -> None: self.assertFalse(a1.flags.writeable) def test_iterable_str_to_array_1d_int_3a(self) -> None: - a1 = iterable_str_to_array_1d([ - str(9_223_372_036_854_775_807), - '0', - str(-9_223_372_036_854_775_808)], np.int64) + a1 = iterable_str_to_array_1d( + [str(9_223_372_036_854_775_807), '0', str(-9_223_372_036_854_775_808)], + np.int64, + ) self.assertEqual(a1.tolist(), [9223372036854775807, 0, -9223372036854775808]) self.assertEqual(a1.dtype, np.dtype(np.int64)) self.assertFalse(a1.flags.writeable) def test_iterable_str_to_array_1d_int_3b(self) -> None: with self.assertRaises(TypeError): - _ = iterable_str_to_array_1d([ - str(9_223_372_036_854_775_808), - '0', - str(-9_223_372_036_854_775_809)], np.int64) + _ = iterable_str_to_array_1d( + [str(9_223_372_036_854_775_808), '0', str(-9_223_372_036_854_775_809)], + np.int64, + ) # self.assertEqual(a1.tolist(), [0, 0, 0]) # self.assertEqual(a1.dtype, np.dtype(np.int64)) @@ -95,39 +93,30 @@ def test_iterable_str_to_array_1d_int_4(self) -> None: self.assertFalse(a1.flags.writeable) def test_iterable_str_to_array_1d_int_5(self) -> None: - a1 = iterable_str_to_array_1d([ - str(2_147_483_647), - '0', - str(-2_147_483_648)], np.int32) + a1 = iterable_str_to_array_1d( + [str(2_147_483_647), '0', str(-2_147_483_648)], np.int32 + ) self.assertEqual(a1.tolist(), [2147483647, 0, -2147483648]) self.assertEqual(a1.dtype, np.dtype(np.int32)) self.assertFalse(a1.flags.writeable) - def test_iterable_str_to_array_1d_int_6(self) -> None: - a1 = iterable_str_to_array_1d([ - str(2_147_483_647_000), - '0', - str(-2_147_483_648_000)], np.int32) + a1 = iterable_str_to_array_1d( + [str(2_147_483_647_000), '0', str(-2_147_483_648_000)], np.int32 + ) # NOTE: overflow characteristics may not be stable self.assertEqual(a1.tolist(), [-1000, 0, 0]) self.assertEqual(a1.dtype, np.dtype(np.int32)) self.assertFalse(a1.flags.writeable) def test_iterable_str_to_array_1d_int_7(self) -> None: - a1 = iterable_str_to_array_1d([ - str(32_767), - '0', - str(-32_768)], np.int16) + a1 = iterable_str_to_array_1d([str(32_767), '0', str(-32_768)], np.int16) self.assertEqual(a1.tolist(), [32767, 0, -32768]) self.assertEqual(a1.dtype, np.dtype(np.int16)) self.assertFalse(a1.flags.writeable) def test_iterable_str_to_array_1d_int_8(self) -> None: - a1 = iterable_str_to_array_1d([ - str(127), - '0', - str(-128)], np.int8) + a1 = iterable_str_to_array_1d([str(127), '0', str(-128)], np.int8) self.assertEqual(a1.tolist(), [127, 0, -128]) self.assertEqual(a1.dtype, np.dtype(np.int8)) self.assertFalse(a1.flags.writeable) @@ -137,27 +126,31 @@ def test_iterable_str_to_array_1d_int_9(self) -> None: _ = iterable_str_to_array_1d(['3', '4', 'foo'], int) def test_iterable_str_to_array_1d_int_10(self) -> None: - a1 = iterable_str_to_array_1d(['3', '4', '1']) # no dtype argument + a1 = iterable_str_to_array_1d(['3', '4', '1']) # no dtype argument self.assertEqual(a1.tolist(), [3, 4, 1]) def test_iterable_str_to_array_1d_int_11(self) -> None: - a1 = iterable_str_to_array_1d(['3,000', '4,000', '1,000'], - dtype=int, - thousandschar=',', - ) + a1 = iterable_str_to_array_1d( + ['3,000', '4,000', '1,000'], + dtype=int, + thousandschar=',', + ) self.assertEqual(a1.tolist(), [3000, 4000, 1000]) def test_iterable_str_to_array_1d_int_12(self) -> None: - a1 = iterable_str_to_array_1d(['3.000', '4.000', '1.000'], dtype=int, thousandschar='.') + a1 = iterable_str_to_array_1d( + ['3.000', '4.000', '1.000'], dtype=int, thousandschar='.' + ) self.assertEqual(a1.tolist(), [3000, 4000, 1000]) def test_iterable_str_to_array_1d_int_13(self) -> None: # TypeError: error parsing integer with self.assertRaises(TypeError): - a1 = iterable_str_to_array_1d(['3.000', '4.000', '1.000'], dtype=int, thousandschar=',') - + a1 = iterable_str_to_array_1d( + ['3.000', '4.000', '1.000'], dtype=int, thousandschar=',' + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_iterable_str_to_array_1d_uint_1(self) -> None: a1 = iterable_str_to_array_1d(['23', '54', ' 1000', '23 '], np.uint64) @@ -173,16 +166,23 @@ def test_iterable_str_to_array_1d_uint_2a(self) -> None: def test_iterable_str_to_array_1d_uint_2b(self) -> None: with self.assertRaises(TypeError): - _ = iterable_str_to_array_1d([str(18_446_744_073_709_551_616), '0'], np.uint64) + _ = iterable_str_to_array_1d( + [str(18_446_744_073_709_551_616), '0'], np.uint64 + ) def test_iterable_str_to_array_1d_uint_3(self) -> None: - a1 = iterable_str_to_array_1d([ + a1 = iterable_str_to_array_1d( + [ str(18_446_744_073_709_551), str(18_446_744_073_709_551_6), str(18_446_744_073_709_551_61), - '0'], np.uint64) - self.assertEqual(a1.tolist(), - [18446744073709551, 184467440737095516, 1844674407370955161, 0]) + '0', + ], + np.uint64, + ) + self.assertEqual( + a1.tolist(), [18446744073709551, 184467440737095516, 1844674407370955161, 0] + ) self.assertEqual(a1.dtype, np.dtype(np.uint64)) self.assertFalse(a1.flags.writeable) @@ -205,50 +205,56 @@ def test_iterable_str_to_array_1d_uint_4(self) -> None: self.assertFalse(a1.flags.writeable) def test_iterable_str_to_array_1d_uint_5(self) -> None: - a1 = iterable_str_to_array_1d(['3,000', '4,000', '1,000'], - dtype=np.uint64, - thousandschar=',') + a1 = iterable_str_to_array_1d( + ['3,000', '4,000', '1,000'], dtype=np.uint64, thousandschar=',' + ) self.assertEqual(a1.tolist(), [3000, 4000, 1000]) def test_iterable_str_to_array_1d_uint_6(self) -> None: - a1 = iterable_str_to_array_1d(['3.000', '4.000', '1.000'], dtype=np.uint64, thousandschar='.') + a1 = iterable_str_to_array_1d( + ['3.000', '4.000', '1.000'], dtype=np.uint64, thousandschar='.' + ) self.assertEqual(a1.tolist(), [3000, 4000, 1000]) def test_iterable_str_to_array_1d_uint_7(self) -> None: # TypeError: error parsing integer with self.assertRaises(TypeError): - a1 = iterable_str_to_array_1d(['3.000', '4.000', '1.000'], dtype=np.uint64, thousandschar=',') + a1 = iterable_str_to_array_1d( + ['3.000', '4.000', '1.000'], dtype=np.uint64, thousandschar=',' + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_iterable_str_to_array_1d_float_1(self) -> None: a1 = iterable_str_to_array_1d(['23.1', '54.5', '1000.2', '23.'], float) - self.assertEqual(a1.tolist(),[23.1, 54.5, 1000.2, 23.0]) + self.assertEqual(a1.tolist(), [23.1, 54.5, 1000.2, 23.0]) self.assertEqual(a1.dtype, np.dtype(np.float64)) self.assertFalse(a1.flags.writeable) def test_iterable_str_to_array_1d_float_2(self) -> None: - a1 = iterable_str_to_array_1d(['23.1', ' 54.5 ', ' 1000.2', '23. '], float) - self.assertEqual(a1.tolist(),[23.1, 54.5, 1000.2, 23.0]) + a1 = iterable_str_to_array_1d( + ['23.1', ' 54.5 ', ' 1000.2', '23. '], float + ) + self.assertEqual(a1.tolist(), [23.1, 54.5, 1000.2, 23.0]) self.assertEqual(a1.dtype, np.dtype(np.float64)) self.assertFalse(a1.flags.writeable) def test_iterable_str_to_array_1d_float_3(self) -> None: a1 = iterable_str_to_array_1d(['23.1', ' 54', ' 1000.2', '23'], float) - self.assertEqual(a1.tolist(),[23.1, 54.0, 1000.2, 23.0]) + self.assertEqual(a1.tolist(), [23.1, 54.0, 1000.2, 23.0]) self.assertEqual(a1.dtype, np.dtype(np.float64)) self.assertFalse(a1.flags.writeable) def test_iterable_str_to_array_1d_float_4(self) -> None: a1 = iterable_str_to_array_1d(['23', ' 54', ' 1000', '23'], float) - self.assertEqual(a1.tolist(),[23.0, 54.0, 1000.0, 23.0]) + self.assertEqual(a1.tolist(), [23.0, 54.0, 1000.0, 23.0]) self.assertEqual(a1.dtype, np.dtype(np.float64)) self.assertFalse(a1.flags.writeable) def test_iterable_str_to_array_1d_float_5(self) -> None: a1 = iterable_str_to_array_1d(['inf', ' nan', ' 1e-200', '1.5e34'], float) self.assertEqual(str(a1[:3].tolist()), '[inf, nan, 1e-200]') - self.assertTrue((a1[3] - 1.5e34) < 1e-18) # noise! + self.assertTrue((a1[3] - 1.5e34) < 1e-18) # noise! self.assertEqual(a1.dtype, np.dtype(np.float64)) self.assertFalse(a1.flags.writeable) @@ -275,23 +281,24 @@ def test_iterable_str_to_array_1d_float_9(self) -> None: def test_iterable_str_to_array_1d_float_10(self) -> None: a1 = iterable_str_to_array_1d(['23,1', '1000,2'], dtype=float, decimalchar=',') - self.assertEqual(a1.tolist(),[23.1, 1000.2]) + self.assertEqual(a1.tolist(), [23.1, 1000.2]) self.assertEqual(a1.dtype, np.dtype(np.float64)) self.assertFalse(a1.flags.writeable) def test_iterable_str_to_array_1d_float_11(self) -> None: with self.assertRaises(TypeError): - a1 = iterable_str_to_array_1d(['23.1', '1000.2'], dtype=float, decimalchar=',') - self.assertEqual(a1.tolist(),[23.1, 1000.2]) + a1 = iterable_str_to_array_1d( + ['23.1', '1000.2'], dtype=float, decimalchar=',' + ) + self.assertEqual(a1.tolist(), [23.1, 1000.2]) def test_iterable_str_to_array_1d_float_12(self) -> None: a1 = iterable_str_to_array_1d(['23.1', '54.5', '1000.2', '23.'], np.float16) - self.assertEqual(a1.tolist(),[23.09375, 54.5, 1000.0, 23.0]) + self.assertEqual(a1.tolist(), [23.09375, 54.5, 1000.0, 23.0]) self.assertEqual(a1.dtype, np.dtype(np.float16)) self.assertFalse(a1.flags.writeable) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_iterable_str_to_array_1d_str_1(self) -> None: a1 = iterable_str_to_array_1d([' sdf ', ' we', 'aaa', 'qqqqq '], str) @@ -330,15 +337,19 @@ def test_iterable_str_to_array_1d_str_6(self) -> None: self.assertEqual(a1.tolist(), ['aa', 'bbb', 'cccc', ' ddd']) def test_iterable_str_to_array_1d_str_7(self) -> None: - a1 = iterable_str_to_array_1d(['aa', 'bbb', 'ccccc', ' dddd ', ''], np.dtype(' None: - a1 = iterable_str_to_array_1d(['aa', 'bbb', 'ccccc', 'dddddd', ''], np.dtype('|S3')) + a1 = iterable_str_to_array_1d( + ['aa', 'bbb', 'ccccc', 'dddddd', ''], np.dtype('|S3') + ) self.assertEqual(a1.dtype.str, '|S3') self.assertEqual(a1.tolist(), [b'aa', b'bbb', b'ccc', b'ddd', b'']) self.assertFalse(a1.flags.writeable) @@ -350,17 +361,19 @@ def test_iterable_str_to_array_1d_bytes_2(self) -> None: self.assertFalse(a1.flags.writeable) def test_iterable_str_to_array_1d_bytes_3(self) -> None: - a1 = iterable_str_to_array_1d(['aa', 'bbb', 'ccccc', 'dddddd', ''], np.dtype('|S1')) + a1 = iterable_str_to_array_1d( + ['aa', 'bbb', 'ccccc', 'dddddd', ''], np.dtype('|S1') + ) self.assertEqual(a1.dtype.str, '|S1') self.assertEqual(a1.tolist(), [b'a', b'b', b'c', b'd', b'']) self.assertFalse(a1.flags.writeable) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_iterable_str_to_array_1d_complex_1(self) -> None: a1 = iterable_str_to_array_1d(['(3+0j)', '(100+0j)'], complex) self.assertEqual(a1.dtype, np.dtype(complex)) - self.assertEqual(a1.tolist(), [(3+0j), (100+0j)]) + self.assertEqual(a1.tolist(), [(3 + 0j), (100 + 0j)]) def test_iterable_str_to_array_1d_complex_2(self) -> None: a1 = iterable_str_to_array_1d(['3+0j', '100+nanj'], complex) @@ -369,7 +382,7 @@ def test_iterable_str_to_array_1d_complex_2(self) -> None: def test_iterable_str_to_array_1d_complex_3(self) -> None: a1 = iterable_str_to_array_1d(['-2+1.2j', '1.5+4.2j'], complex) self.assertEqual(a1.dtype, np.dtype(complex)) - self.assertEqual(a1.tolist(), [(-2+1.2j), (1.5+4.2j)]) + self.assertEqual(a1.tolist(), [(-2 + 1.2j), (1.5 + 4.2j)]) def test_iterable_str_to_array_1d_complex_4(self) -> None: a1 = iterable_str_to_array_1d(['(-0+infj)', '0j'], complex) @@ -385,31 +398,37 @@ def test_iterable_str_to_array_1d_complex_6(self) -> None: with self.assertRaises(ValueError): a1 = iterable_str_to_array_1d(['-2+1.2asdfj', '1.5wer4.2j'], complex) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_iterable_str_to_array_1d_dt64_1(self) -> None: a1 = iterable_str_to_array_1d(['2020-01-01', '2020-02-01'], 'datetime64[D]') self.assertEqual(a1.dtype, np.dtype(' None: a1 = iterable_str_to_array_1d(['2020-01-01', '2020-02-01'], np.datetime64) self.assertEqual(a1.dtype, np.dtype(' None: a1 = iterable_str_to_array_1d(['2020-01-01', '2020-02-01'], np.datetime64) self.assertEqual(a1.dtype, np.dtype(' None: with self.assertRaises(ValueError): _ = iterable_str_to_array_1d(['202.30', '202.20'], 'datetime64[D]') - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_iterable_str_to_array_1d_parse_1(self) -> None: a1 = iterable_str_to_array_1d(['20', '30'], None) @@ -435,14 +454,12 @@ def test_iterable_str_to_array_1d_parse_4(self) -> None: self.assertFalse(a1.flags.writeable) self.assertEqual(a1.tolist(), ['b', 'ee', 't']) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_iterable_str_to_array_1d_raise_a(self) -> None: with self.assertRaises(TypeError): a1 = iterable_str_to_array_1d([3, 4, 5], None) - #--------------------------------------------------------------------------- - + # --------------------------------------------------------------------------- def test_iterable_str_to_array_1d_empty_a(self) -> None: # an empty string is an nan float @@ -452,13 +469,13 @@ def test_iterable_str_to_array_1d_empty_a(self) -> None: post2 = iterable_str_to_array_1d(['', '', ''], None) self.assertEqual(post2.tolist(), ['', '', '']) -# + def test_iterable_str_to_array_1d_empty_b(self) -> None: -# + # with self.assertRaises(ValueError): -# + # an empty string is an invalid identifier for -# + with self.assertRaises(TypeError): -# + _ = iterable_str_to_array_1d(['', '', '3'], int) -# + - #--------------------------------------------------------------------------- + # + def test_iterable_str_to_array_1d_empty_b(self) -> None: + # + # with self.assertRaises(ValueError): + # + # an empty string is an invalid identifier for + # + with self.assertRaises(TypeError): + # + _ = iterable_str_to_array_1d(['', '', '3'], int) + # + + # --------------------------------------------------------------------------- def test_delimited_to_arrays_a(self) -> None: @@ -480,7 +497,6 @@ def test_delimited_to_arrays_a(self) -> None: self.assertEqual(len(post1), 4) self.assertTrue(all(len(e) == 3 for e in post1)) - def test_delimited_to_arrays_b(self) -> None: msg = [ @@ -495,7 +511,6 @@ def test_delimited_to_arrays_b(self) -> None: self.assertEqual(len(post0), 40) self.assertTrue(all(len(e) == 3 for e in post0)) - def test_delimited_to_arrays_c(self) -> None: msg = [ @@ -510,8 +525,51 @@ def test_delimited_to_arrays_c(self) -> None: self.assertEqual(len(post0), 40) self.assertTrue(all(len(e) == 3 for e in post0)) - self.assertEqual([a.tolist() for a in post0], - [[True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343]]) + self.assertEqual( + [a.tolist() for a in post0], + [ + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + ], + ) def test_delimited_to_arrays_d(self) -> None: @@ -526,8 +584,51 @@ def test_delimited_to_arrays_d(self) -> None: self.assertTrue(isinstance(post0, list)) self.assertEqual(len(post0), 40) self.assertTrue(all(len(e) == 3 for e in post0)) - self.assertEqual([a.tolist() for a in post0], - [[True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343], [True, True, False], [10, -2000, 82342343]]) + self.assertEqual( + [a.tolist() for a in post0], + [ + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + [True, True, False], + [10, -2000, 82342343], + ], + ) def test_delimited_to_arrays_e(self) -> None: @@ -542,8 +643,71 @@ def test_delimited_to_arrays_e(self) -> None: self.assertTrue(isinstance(post0, list)) self.assertEqual(len(post0), 60) self.assertTrue(all(len(e) == 3 for e in post0)) - self.assertEqual([x.dtype.str for x in post0], - ['|b1', ' None: @@ -556,11 +720,20 @@ def test_delimited_to_arrays_f(self) -> None: dtypes0 = ([' None: @@ -572,34 +745,39 @@ def test_delimited_to_arrays_g(self) -> None: dtypes0 = ([bool, int, float, str]).__getitem__ post0 = delimited_to_arrays(msg, dtypes=dtypes0, axis=1) - self.assertEqual([a.dtype.kind for a in post0], - ['b', 'i', 'f', 'U']) + self.assertEqual([a.dtype.kind for a in post0], ['b', 'i', 'f', 'U']) - self.assertEqual([a.tolist() for a in post0], - [[False, True, True], - [100, 200, -234], - [np.inf, 6.5, 3.2e-10], - ['red', 'blue', 'green']]) + self.assertEqual( + [a.tolist() for a in post0], + [ + [False, True, True], + [100, 200, -234], + [np.inf, 6.5, 3.2e-10], + ['red', 'blue', 'green'], + ], + ) def test_delimited_to_arrays_h(self) -> None: msg = [ - 0, 1, - 2, 3, + 0, + 1, + 2, + 3, ] with self.assertRaises(RuntimeError): _ = delimited_to_arrays(msg, axis=1) - def test_delimited_to_arrays_i(self) -> None: msg = [ - b'a', b'b', - b'c', b'd', + b'a', + b'b', + b'c', + b'd', ] with self.assertRaises(RuntimeError): _ = delimited_to_arrays(msg, axis=1) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_parse_a(self) -> None: msg = [ @@ -609,14 +787,17 @@ def test_delimited_to_arrays_parse_a(self) -> None: ] post0 = delimited_to_arrays(msg, dtypes=None, axis=1) - self.assertEqual([a.dtype.kind for a in post0], - ['b', 'i', 'f', 'U']) + self.assertEqual([a.dtype.kind for a in post0], ['b', 'i', 'f', 'U']) - self.assertEqual([a.tolist() for a in post0], - [[False, True, True], - [100, 200, -234], - [np.inf, 6.5, 3.2e-10], - [' red', ' blue', ' green']]) + self.assertEqual( + [a.tolist() for a in post0], + [ + [False, True, True], + [100, 200, -234], + [np.inf, 6.5, 3.2e-10], + [' red', ' blue', ' green'], + ], + ) def test_delimited_to_arrays_parse_b(self) -> None: msg = ['0j', '(-0+infj)'] @@ -629,17 +810,17 @@ def test_delimited_to_arrays_parse_c(self) -> None: 'false, 10, inf', 'true, 20, 6.5', 'True, -24, 3.2e-10', - ] + ] dtypes = [None, np.int16, None].__getitem__ post1 = delimited_to_arrays(msg, dtypes=dtypes, axis=1) - self.assertEqual([a.dtype.str for a in post1], ['|b1', ' None: msg = [ 'false, 10, inf', 'true, 20, 6.5', - ] + ] dtypes = [None].__getitem__ with self.assertRaises(RuntimeError): post1 = delimited_to_arrays(msg, dtypes=dtypes, axis=1) @@ -649,53 +830,52 @@ def test_delimited_to_arrays_parse_e1(self) -> None: msg = [ 'a, "10", "foo"', 'b, "20", "bar', - ] + ] with self.assertRaises(NotImplementedError): - post1 = delimited_to_arrays(msg, - dtypes=[str, int, 'V'].__getitem__, - axis=1, - quoting=csv.QUOTE_ALL, - skipinitialspace=True, - ) + post1 = delimited_to_arrays( + msg, + dtypes=[str, int, 'V'].__getitem__, + axis=1, + quoting=csv.QUOTE_ALL, + skipinitialspace=True, + ) def test_delimited_to_arrays_parse_e2(self) -> None: msg = [ 'a, "10", "foo"', 'b, "20", "bar', - ] - post1 = delimited_to_arrays(msg, - dtypes=[str, int, str].__getitem__, - axis=1, - quoting=csv.QUOTE_ALL, - skipinitialspace=True, - ) - self.assertEqual([x.tolist() for x in post1], - [['a', 'b'], [10, 20], ['foo', 'bar']]) - + ] + post1 = delimited_to_arrays( + msg, + dtypes=[str, int, str].__getitem__, + axis=1, + quoting=csv.QUOTE_ALL, + skipinitialspace=True, + ) + self.assertEqual( + [x.tolist() for x in post1], [['a', 'b'], [10, 20], ['foo', 'bar']] + ) def test_delimited_to_arrays_parse_f(self) -> None: msg = [ 'a, 10, foo', 'b, 20, bar', - ] + ] dtypes = [str, int].__getitem__ # dtypes fails for argument 2 with self.assertRaises(RuntimeError): _ = delimited_to_arrays(msg, axis=1, dtypes=dtypes) - def test_delimited_to_arrays_parse_g(self) -> None: msg = [ 'a,10,foo', 'b,20,\0', - ] + ] # if a null character is encountered it used to raise; this seemed unnecessary post = delimited_to_arrays(msg, axis=1) - self.assertEqual( [a.tolist() for a in post], - [['a', 'b'], [10, 20], ['foo', '']] - ) + self.assertEqual([a.tolist() for a in post], [['a', 'b'], [10, 20], ['foo', '']]) def test_delimited_to_arrays_parse_h(self) -> None: msg = [',0', 'False,1'] @@ -706,207 +886,243 @@ def test_delimited_to_arrays_parse_h(self) -> None: post2 = delimited_to_arrays(msg, axis=1, dtypes=[str, None].__getitem__) self.assertEqual([a.tolist() for a in post2], [['', 'False'], [0, 1]]) - def test_delimited_to_arrays_parse_i(self) -> None: msg = [ 'a, 10, foo', 'b, 20, c', - ] + ] post1 = delimited_to_arrays(msg, axis=1, skipinitialspace=False) - self.assertEqual([a.tolist() for a in post1], [['a', 'b'], [10, 20], [' foo', ' c']]) + self.assertEqual( + [a.tolist() for a in post1], [['a', 'b'], [10, 20], [' foo', ' c']] + ) post2 = delimited_to_arrays(msg, axis=1, skipinitialspace=True) - self.assertEqual([a.tolist() for a in post2], [['a', 'b'], [10, 20], ['foo', 'c']]) + self.assertEqual( + [a.tolist() for a in post2], [['a', 'b'], [10, 20], ['foo', 'c']] + ) def test_delimited_to_arrays_parse_j(self) -> None: msg = [ '2021,2021-04-01,4', '2022,2022-05-01,3', - ] + ] post1 = delimited_to_arrays(msg, axis=1, skipinitialspace=False) - self.assertEqual([a.tolist() for a in post1], [[2021, 2022], ['2021-04-01', '2022-05-01'], [4, 3]]) - + self.assertEqual( + [a.tolist() for a in post1], + [[2021, 2022], ['2021-04-01', '2022-05-01'], [4, 3]], + ) def test_delimited_to_arrays_parse_k(self) -> None: msg = [ '2021,2021-04,4', '2022,2022-05,3', - ] + ] post1 = delimited_to_arrays(msg, axis=1, skipinitialspace=False) - self.assertEqual([a.tolist() for a in post1], [[2021, 2022], ['2021-04', '2022-05'], [4, 3]]) - + self.assertEqual( + [a.tolist() for a in post1], [[2021, 2022], ['2021-04', '2022-05'], [4, 3]] + ) def test_delimited_to_arrays_parse_l(self) -> None: msg = [ '1,2,3', '2-,2-0,-3', - ] + ] post1 = delimited_to_arrays(msg, axis=1, skipinitialspace=False) - self.assertEqual([a.tolist() for a in post1], [['1', '2-'], ['2', '2-0'], [3, -3]]) + self.assertEqual( + [a.tolist() for a in post1], [['1', '2-'], ['2', '2-0'], [3, -3]] + ) def test_delimited_to_arrays_parse_m(self) -> None: msg = [ ' 1, 2,3', ' 2-, 2-0, -3', - ] + ] post1 = delimited_to_arrays(msg, axis=1, skipinitialspace=False) - self.assertEqual([a.tolist() for a in post1], [[' 1', ' 2-'], [' 2', ' 2-0'], [3, -3]]) + self.assertEqual( + [a.tolist() for a in post1], [[' 1', ' 2-'], [' 2', ' 2-0'], [3, -3]] + ) def test_delimited_to_arrays_parse_n(self) -> None: msg = ['1,2,1', '3,4,5'] post1 = delimited_to_arrays(msg, axis=1) # NOTE: automatic type parsing should always give an int64 - self.assertEqual([str(a.dtype) for a in post1], - ['int64', 'int64', 'int64']) + self.assertEqual([str(a.dtype) for a in post1], ['int64', 'int64', 'int64']) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_float_a(self) -> None: msg = [ '1.2, 5.4, 9.2', ' 3.5 , 2.3 , 6.3 ', - ] + ] post1 = delimited_to_arrays(msg, axis=1, skipinitialspace=True) - self.assertEqual([a.round(1).tolist() for a in post1], - [[1.2, 3.5], [5.4, 2.3], [9.2, 6.3]] - ) + self.assertEqual( + [a.round(1).tolist() for a in post1], [[1.2, 3.5], [5.4, 2.3], [9.2, 6.3]] + ) def test_delimited_to_arrays_float_b(self) -> None: msg = [ '1.2, inf , 9.2', ' 3.5 , 2.3 , -inf ', - ] + ] post1 = delimited_to_arrays(msg, axis=1, skipinitialspace=True) - self.assertEqual([a.round(1).tolist() for a in post1], - [[1.2, 3.5], [np.inf, 2.3], [9.2, -np.inf]] - ) + self.assertEqual( + [a.round(1).tolist() for a in post1], + [[1.2, 3.5], [np.inf, 2.3], [9.2, -np.inf]], + ) def test_delimited_to_arrays_float_c(self) -> None: msg = [ '1.2, nan , 9.2', ' 3.5 , 2.3 , -nan ', - ] + ] post1 = delimited_to_arrays(msg, axis=1, skipinitialspace=True) self.assertEqual( - [str(a.round(1).tolist()) for a in post1], - ['[1.2, 3.5]', '[nan, 2.3]', '[9.2, nan]'] - ) + [str(a.round(1).tolist()) for a in post1], + ['[1.2, 3.5]', '[nan, 2.3]', '[9.2, nan]'], + ) def test_delimited_to_arrays_float_d(self) -> None: msg = [ '1,2;9,2', '3,5;2,3', - ] + ] post1 = delimited_to_arrays(msg, axis=1, decimalchar=',', delimiter=';') self.assertEqual( - [str(a.round(1).tolist()) for a in post1], - ['[1.2, 3.5]', '[9.2, 2.3]'] - ) + [str(a.round(1).tolist()) for a in post1], ['[1.2, 3.5]', '[9.2, 2.3]'] + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_int_a(self) -> None: msg = [ '12, 54, "9,200"', ' 35 , 23 , "6,300" ', - ] + ] dtypes = lambda x: int with self.assertRaises(TypeError): - _ = delimited_to_arrays(msg, axis=1, skipinitialspace=True, dtypes=dtypes, quoting=csv.QUOTE_MINIMAL) + _ = delimited_to_arrays( + msg, + axis=1, + skipinitialspace=True, + dtypes=dtypes, + quoting=csv.QUOTE_MINIMAL, + ) # this fails until we accept the thousands character - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_quoting_a(self) -> None: - msg = ['a,3,True', 'b,-1,False'] + msg = ['a,3,True', 'b,-1,False'] post1 = delimited_to_arrays(msg, axis=1, quoting=csv.QUOTE_MINIMAL) - self.assertEqual([a.tolist() for a in post1], [['a', 'b'], [3, -1], [True, False]]) + self.assertEqual( + [a.tolist() for a in post1], [['a', 'b'], [3, -1], [True, False]] + ) post2 = delimited_to_arrays(msg, axis=1, quoting=csv.QUOTE_ALL) - self.assertEqual([a.tolist() for a in post2], [['a', 'b'], [3, -1], [True, False]]) + self.assertEqual( + [a.tolist() for a in post2], [['a', 'b'], [3, -1], [True, False]] + ) post3 = delimited_to_arrays(msg, axis=1, quoting=csv.QUOTE_NONE) - self.assertEqual([a.tolist() for a in post3], [['a', 'b'], [3, -1], [True, False]]) + self.assertEqual( + [a.tolist() for a in post3], [['a', 'b'], [3, -1], [True, False]] + ) # this is supported but has no effect post4 = delimited_to_arrays(msg, axis=1, quoting=csv.QUOTE_NONNUMERIC) - self.assertEqual([a.tolist() for a in post4], [['a', 'b'], [3, -1], [True, False]]) + self.assertEqual( + [a.tolist() for a in post4], [['a', 'b'], [3, -1], [True, False]] + ) def test_delimited_to_arrays_quoting_b(self) -> None: - msg = ['"fo,o",3,True', '"ba,r",-1,False'] + msg = ['"fo,o",3,True', '"ba,r",-1,False'] post1 = delimited_to_arrays(msg, axis=1, quoting=csv.QUOTE_MINIMAL) - self.assertEqual([a.tolist() for a in post1], [['fo,o', 'ba,r'], [3, -1], [True, False]]) + self.assertEqual( + [a.tolist() for a in post1], [['fo,o', 'ba,r'], [3, -1], [True, False]] + ) post2 = delimited_to_arrays(msg, axis=1, quoting=csv.QUOTE_ALL) - self.assertEqual([a.tolist() for a in post2], [['fo,o', 'ba,r'], [3, -1], [True, False]]) - + self.assertEqual( + [a.tolist() for a in post2], [['fo,o', 'ba,r'], [3, -1], [True, False]] + ) def test_delimited_to_arrays_quoting_c(self) -> None: - msg = ['"fo,o",3,True', '"ba,r",-1,False'] + msg = ['"fo,o",3,True', '"ba,r",-1,False'] post1 = delimited_to_arrays(msg, axis=1, quoting=csv.QUOTE_NONE) # NOTE: with quoting disabled, we observe the comma as a delimiter - self.assertEqual([a.tolist() for a in post1], [['"fo', '"ba'], ['o"', 'r"'], [3, -1], [True, False]]) + self.assertEqual( + [a.tolist() for a in post1], + [['"fo', '"ba'], ['o"', 'r"'], [3, -1], [True, False]], + ) def test_delimited_to_arrays_quoting_d(self) -> None: - msg = ['"foo","3","True"', '"bar","-1","False"'] + msg = ['"foo","3","True"', '"bar","-1","False"'] # with QUOTE_NONE, all remain string types post1 = delimited_to_arrays(msg, axis=1, quoting=csv.QUOTE_NONE) - self.assertEqual([a.tolist() for a in post1], [['"foo"', '"bar"'], ['"3"', '"-1"'], ['"True"', '"False"']]) + self.assertEqual( + [a.tolist() for a in post1], + [['"foo"', '"bar"'], ['"3"', '"-1"'], ['"True"', '"False"']], + ) # with QUOTE_ALL, quotes are stripped, types are evaluated post2 = delimited_to_arrays(msg, axis=1, quoting=csv.QUOTE_ALL) - self.assertEqual([a.tolist() for a in post2], [['foo', 'bar'], [3, -1], [True, False]]) + self.assertEqual( + [a.tolist() for a in post2], [['foo', 'bar'], [3, -1], [True, False]] + ) # import ipdb; ipdb.set_trace() def test_delimited_to_arrays_quoting_e(self) -> None: - msg = ['a,3,True', 'b,-1,False'] + msg = ['a,3,True', 'b,-1,False'] with self.assertRaises(TypeError): _ = delimited_to_arrays(msg, axis=1, quoting=20) with self.assertRaises(TypeError): - _ = delimited_to_arrays(msg, axis=1, quoting="foo") + _ = delimited_to_arrays(msg, axis=1, quoting='foo') - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_delimiter_a(self) -> None: - msg = ['a,3,True', 'b,-1,False'] + msg = ['a,3,True', 'b,-1,False'] with self.assertRaises(TypeError): _ = delimited_to_arrays(msg, axis=1, delimiter='foo') def test_delimited_to_arrays_delimiter_b(self) -> None: - msg = ['a,3,True', 'b,-1,False'] + msg = ['a,3,True', 'b,-1,False'] with self.assertRaises(TypeError): _ = delimited_to_arrays(msg, axis=1, delimiter=None) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_escapechar_a(self) -> None: - msg = ['a,3,True', 'b,-1,False'] + msg = ['a,3,True', 'b,-1,False'] with self.assertRaises(TypeError): _ = delimited_to_arrays(msg, axis=1, escapechar='foo') def test_delimited_to_arrays_escapechar_b(self) -> None: - msg = ['f/"oo,3,True', 'b/,ar,-1,False'] - post1 = delimited_to_arrays(msg, - axis=1, - escapechar='/', - quoting=csv.QUOTE_NONE, - doublequote=False, - ) + msg = ['f/"oo,3,True', 'b/,ar,-1,False'] + post1 = delimited_to_arrays( + msg, + axis=1, + escapechar='/', + quoting=csv.QUOTE_NONE, + doublequote=False, + ) - self.assertEqual([a.tolist() for a in post1], - [['f"oo', 'b,ar'], [3, -1], [True, False]]) + self.assertEqual( + [a.tolist() for a in post1], [['f"oo', 'b,ar'], [3, -1], [True, False]] + ) def test_delimited_to_arrays_escapechar_c(self) -> None: - msg = ['foo,3,True', 'bar,-1,False'] - post1 = delimited_to_arrays(msg, - axis=1, - escapechar=None, - ) - self.assertEqual([a.tolist() for a in post1], - [['foo', 'bar'], [3, -1], [True, False]]) + msg = ['foo,3,True', 'bar,-1,False'] + post1 = delimited_to_arrays( + msg, + axis=1, + escapechar=None, + ) + self.assertEqual( + [a.tolist() for a in post1], [['foo', 'bar'], [3, -1], [True, False]] + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_quotechar_a(self) -> None: - msg = ['a,3,True', 'b,-1,False'] + msg = ['a,3,True', 'b,-1,False'] with self.assertRaises(TypeError): _ = delimited_to_arrays(msg, axis=1, quoting=csv.QUOTE_ALL, quotechar='') @@ -918,26 +1134,28 @@ def test_delimited_to_arrays_quotechar_a(self) -> None: _ = delimited_to_arrays(msg, axis=1, quoting=csv.QUOTE_NONE, quotechar=None) - def test_delimited_to_arrays_quotechar_b(self) -> None: - msg = ['|foo|,|3|,|True|', '|bar|,|-1|,|False|'] + msg = ['|foo|,|3|,|True|', '|bar|,|-1|,|False|'] post1 = delimited_to_arrays(msg, axis=1) self.assertEqual([a.dtype.kind for a in post1], ['U', 'U', 'U']) def test_delimited_to_arrays_quotechar_c(self) -> None: - msg = ['|a|,|3|,|True|', '|b|,|-1|,|False|'] + msg = ['|a|,|3|,|True|', '|b|,|-1|,|False|'] post1 = delimited_to_arrays(msg, axis=1, quotechar='|') self.assertEqual([a.dtype.kind for a in post1], ['U', 'i', 'b']) - self.assertEqual([a.tolist() for a in post1], [['a', 'b'], [3, -1], [True, False]]) + self.assertEqual( + [a.tolist() for a in post1], [['a', 'b'], [3, -1], [True, False]] + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_doublequote_a(self) -> None: - msg = ['"f""oo",3,True', '"b""ar",-1,False'] + msg = ['"f""oo",3,True', '"b""ar",-1,False'] post1 = delimited_to_arrays(msg, axis=1, doublequote=True, quoting=csv.QUOTE_ALL) - self.assertEqual([a.tolist() for a in post1], - [['f"oo', 'b"ar'], [3, -1], [True, False]]) + self.assertEqual( + [a.tolist() for a in post1], [['f"oo', 'b"ar'], [3, -1], [True, False]] + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_strict_a(self) -> None: msg = ['"f"oo",3,True', '"b"ar",-1,False'] # will fail because , expected after a quite @@ -946,8 +1164,9 @@ def test_delimited_to_arrays_strict_a(self) -> None: # with stract False we drop only two quotes and keep the rest post1 = delimited_to_arrays(msg, axis=1, strict=False) - self.assertEqual([a.tolist() for a in post1], - [['foo"', 'bar"'], [3, -1], [True, False]]) + self.assertEqual( + [a.tolist() for a in post1], [['foo"', 'bar"'], [3, -1], [True, False]] + ) def test_delimited_to_arrays_strict_b(self) -> None: msg = ['a,3,True', 'b,-1,False', '', ''] @@ -967,14 +1186,12 @@ def test_delimited_to_arrays_strict_c(self) -> None: # NOTE: the empty string is being converted to 0... not sure that is correct self.assertEqual(post1[1].tolist(), [3, -1, 0]) - def test_delimited_to_arrays_strict_d(self) -> None: msg = ['a,3,True', 'b,-1,False,,', 'c,'] post1 = delimited_to_arrays(msg, axis=1, strict=True) self.assertEqual([len(a) for a in post1], [3, 3, 2, 1, 1]) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_axis_a(self) -> None: msg = ['a,3,True', 'b,-1,False'] with self.assertRaises(ValueError): @@ -986,8 +1203,7 @@ def test_delimited_to_arrays_axis_a(self) -> None: with self.assertRaises(TypeError): _ = delimited_to_arrays(msg, axis=None) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_line_select_a(self) -> None: msg = ['a,3,True', 'b,-1,False'] with self.assertRaises(TypeError): @@ -1006,21 +1222,21 @@ def test_delimited_to_arrays_line_select_c(self) -> None: self.assertEqual([x.tolist() for x in post1], [[False, True]]) def test_delimited_to_arrays_line_select_d(self) -> None: - msg = ['a,3,True,c', 'b,-1,False,d'] + msg = ['a,3,True,c', 'b,-1,False,d'] post1 = delimited_to_arrays(msg, axis=1, line_select=lambda i: i in (0, 2)) self.assertEqual([x.tolist() for x in post1], [['a', 'b'], [True, False]]) def test_delimited_to_arrays_line_select_e(self) -> None: - msg = ['a,3,True,c', 'b,-1,False,d'] + msg = ['a,3,True,c', 'b,-1,False,d'] post1 = delimited_to_arrays(msg, axis=1, line_select=lambda i: i == 3) self.assertEqual([x.tolist() for x in post1], [['c', 'd']]) def test_delimited_to_arrays_line_select_f(self) -> None: - msg = ['a,3,True,c', 'b,-1,False,d'] + msg = ['a,3,True,c', 'b,-1,False,d'] post1 = delimited_to_arrays(msg, axis=1, line_select=lambda i: False) self.assertEqual([x.tolist() for x in post1], []) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_thousandschar_a(self) -> None: msg = [ '1.000;4', @@ -1028,12 +1244,13 @@ def test_delimited_to_arrays_thousandschar_a(self) -> None: '4.000;6.000', ] with self.assertRaises(TypeError): - _ = delimited_to_arrays(msg, - axis=1, - dtypes=lambda i: int, - delimiter=';', - thousandschar='foo', - ) + _ = delimited_to_arrays( + msg, + axis=1, + dtypes=lambda i: int, + delimiter=';', + thousandschar='foo', + ) def test_delimited_to_arrays_thousandschar_b(self) -> None: @@ -1042,17 +1259,18 @@ def test_delimited_to_arrays_thousandschar_b(self) -> None: '2.000;5.000', '4.000;6.000.000', ] - post1 = delimited_to_arrays(msg, - axis=1, - dtypes=lambda i: int, - delimiter=';', - thousandschar='.', - ) - self.assertEqual([x.tolist() for x in post1], - [[1000, 2000, 4000], [4, 5000, 6000000]]) - + post1 = delimited_to_arrays( + msg, + axis=1, + dtypes=lambda i: int, + delimiter=';', + thousandschar='.', + ) + self.assertEqual( + [x.tolist() for x in post1], [[1000, 2000, 4000], [4, 5000, 6000000]] + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_decimalchar_a(self) -> None: msg = [ '1.000;4', @@ -1060,12 +1278,13 @@ def test_delimited_to_arrays_decimalchar_a(self) -> None: '4.000;6.000', ] with self.assertRaises(TypeError): - _ = delimited_to_arrays(msg, - axis=1, - dtypes=lambda i: int, - delimiter=';', - decimalchar='foo', - ) + _ = delimited_to_arrays( + msg, + axis=1, + dtypes=lambda i: int, + delimiter=';', + decimalchar='foo', + ) def test_delimited_to_arrays_decimalchar_b(self) -> None: @@ -1074,18 +1293,19 @@ def test_delimited_to_arrays_decimalchar_b(self) -> None: '2.000;5,055', '4.000;6.000,155', ] - post1 = delimited_to_arrays(msg, - axis=1, - dtypes=(int, float).__getitem__, - delimiter=';', - decimalchar=',', - thousandschar='.', - ) - self.assertEqual([x.tolist() for x in post1], - [[1000, 2000, 4000], [4.0, 5.055, 6000.155]]) - + post1 = delimited_to_arrays( + msg, + axis=1, + dtypes=(int, float).__getitem__, + delimiter=';', + decimalchar=',', + thousandschar='.', + ) + self.assertEqual( + [x.tolist() for x in post1], [[1000, 2000, 4000], [4.0, 5.055, 6000.155]] + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_file_like_a(self) -> None: def records(): msg = [ @@ -1095,23 +1315,23 @@ def records(): yield from msg with self.assertRaises(TypeError): - _ = delimited_to_arrays(records, - axis=1, - delimiter=';', - ) + _ = delimited_to_arrays( + records, + axis=1, + delimiter=';', + ) def test_delimited_to_arrays_file_like_b(self) -> None: with self.assertRaises(TypeError): - _ = delimited_to_arrays(3, - axis=1, - delimiter=';', - dtypes=lambda x: int, - ) - - + _ = delimited_to_arrays( + 3, + axis=1, + delimiter=';', + dtypes=lambda x: int, + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_delimited_to_arrays_compare_int_a(self) -> None: # genfromtxt might translate an empty field to -1 or 0 @@ -1146,6 +1366,5 @@ def test_delimited_to_arrays_compare_float_a(self) -> None: self.assertEqual([a.tolist() for a in post3], [[1.8, 3.1], ['', '']]) - if __name__ == '__main__': unittest.main() diff --git a/test/test_delimited_to_arrays_integration.py b/test/test_delimited_to_arrays_integration.py index 57b9cc02..b894b262 100644 --- a/test/test_delimited_to_arrays_integration.py +++ b/test/test_delimited_to_arrays_integration.py @@ -8,6 +8,7 @@ # NOTE: this is not implemented as an automated test as it downloads data on the fly + class DelimitedSource(tp.NamedTuple): url: str delimiter: str @@ -15,16 +16,37 @@ class DelimitedSource(tp.NamedTuple): expected_dtypes: tp.Sequence[str] expected_row_count: int + SOURCES = { 'noa-1': DelimitedSource( url='https://www.ndbc.noaa.gov/view_text_file.php?filename=46222h2018.txt.gz&dir=data/historical/stdmet/', delimiter=' ', skip_header=2, - expected_dtypes=['i','i','i','i','i','i','f','f','f','f','f','i','f','f','f','f','f','f'], + expected_dtypes=[ + 'i', + 'i', + 'i', + 'i', + 'i', + 'i', + 'f', + 'f', + 'f', + 'f', + 'f', + 'i', + 'f', + 'f', + 'f', + 'f', + 'f', + 'f', + ], expected_row_count=16824, - ) + ) } + def download_and_split(fp: str) -> tp.Iterable[str]: fn = hashlib.sha224(bytes(fp, encoding='utf8')).hexdigest() + '.txt' if os.path.exists('/tmp'): @@ -37,7 +59,7 @@ def download_and_split(fp: str) -> tp.Iterable[str]: with open(fp_destination) as f: return f.readlines() - with request.urlopen(fp) as response: #pragma: no cover + with request.urlopen(fp) as response: # pragma: no cover contents = response.read().decode('utf-8') if fp_destination: print(f'writing {fp_destination}') @@ -52,10 +74,9 @@ def process_sources(): for _ in range(ds.skip_header): next(lines) print(label) - post = delimited_to_arrays(lines, - delimiter=ds.delimiter, - skipinitialspace=True, - axis=1) + post = delimited_to_arrays( + lines, delimiter=ds.delimiter, skipinitialspace=True, axis=1 + ) assert [a.dtype.kind for a in post] == ds.expected_dtypes assert all(len(a) == ds.expected_row_count for a in post) diff --git a/test/test_delimited_to_arrays_property.py b/test/test_delimited_to_arrays_property.py index 465ae244..feeafb5b 100644 --- a/test/test_delimited_to_arrays_property.py +++ b/test/test_delimited_to_arrays_property.py @@ -10,39 +10,40 @@ class TestUnit(unittest.TestCase): - - #--------------------------------------------------------------------------- - - @given(st.lists(st.integers(min_value=-9223372036854775808, max_value=9223372036854775807), min_size=1, max_size=40)) + # --------------------------------------------------------------------------- + + @given( + st.lists( + st.integers(min_value=-9223372036854775808, max_value=9223372036854775807), + min_size=1, + max_size=40, + ) + ) def test_delimited_to_arrays_parse_a(self, v) -> None: msg = [f'{x},{x}' for x in v] post = delimited_to_arrays(msg, dtypes=None, axis=1) - self.assertEqual([a.dtype.kind for a in post], - ['i', 'i']) + self.assertEqual([a.dtype.kind for a in post], ['i', 'i']) self.assertEqual(post[0].tolist(), v) @given(st.lists(st.booleans(), min_size=1, max_size=40)) def test_delimited_to_arrays_parse_b(self, v) -> None: msg = [f'{x},{x}' for x in v] post = delimited_to_arrays(msg, dtypes=None, axis=1) - self.assertEqual([a.dtype.kind for a in post], - ['b', 'b']) + self.assertEqual([a.dtype.kind for a in post], ['b', 'b']) self.assertEqual(post[0].tolist(), v) @given(st.lists(st.floats(), min_size=1, max_size=40)) def test_delimited_to_arrays_parse_c(self, v) -> None: msg = [f'{x},{x}' for x in v] post = delimited_to_arrays(msg, dtypes=None, axis=1) - self.assertEqual([a.dtype.kind for a in post], - ['f', 'f']) + self.assertEqual([a.dtype.kind for a in post], ['f', 'f']) # need to handle NaNs @given(st.lists(st.floats(allow_nan=False), min_size=1, max_size=40)) def test_delimited_to_arrays_parse_d(self, v) -> None: msg = [f'{x},{x}' for x in v] post = delimited_to_arrays(msg, dtypes=None, axis=1) - self.assertEqual([a.dtype.kind for a in post], - ['f', 'f']) + self.assertEqual([a.dtype.kind for a in post], ['f', 'f']) # no NaNs self.assertTrue(np.allclose(post[0], v, equal_nan=True)) @@ -50,8 +51,7 @@ def test_delimited_to_arrays_parse_d(self, v) -> None: def test_delimited_to_arrays_parse_e(self, v) -> None: msg = [f'{x},{x}' for x in v] post = delimited_to_arrays(msg, dtypes=None, axis=1) - self.assertEqual([a.dtype.kind for a in post], - ['c', 'c']) + self.assertEqual([a.dtype.kind for a in post], ['c', 'c']) if __name__ == '__main__': diff --git a/test/test_nonzero_1d.py b/test/test_nonzero_1d.py index ba5cd5ae..f602f6a3 100644 --- a/test/test_nonzero_1d.py +++ b/test/test_nonzero_1d.py @@ -3,25 +3,16 @@ from arraykit import nonzero_1d -class TestUnit(unittest.TestCase): +class TestUnit(unittest.TestCase): def test_nonzero_1d_a1(self) -> None: - self.assertEqual( - nonzero_1d(np.array([], dtype=bool)).tolist(), - [] - ) + self.assertEqual(nonzero_1d(np.array([], dtype=bool)).tolist(), []) def test_nonzero_1d_a2(self) -> None: - self.assertEqual( - nonzero_1d(np.array([False], dtype=bool)).tolist(), - [] - ) + self.assertEqual(nonzero_1d(np.array([False], dtype=bool)).tolist(), []) def test_nonzero_1d_a3(self) -> None: - self.assertEqual( - nonzero_1d(np.array([True], dtype=bool)).tolist(), - [0] - ) + self.assertEqual(nonzero_1d(np.array([True], dtype=bool)).tolist(), [0]) def test_nonzero_1d_a4(self) -> None: with self.assertRaises(ValueError): @@ -34,29 +25,21 @@ def test_nonzero_1d_a5(self) -> None: with self.assertRaises(ValueError): nonzero_1d(np.arange(10).reshape(5, 2).astype(bool)) - def test_nonzero_1d_b1(self) -> None: self.assertEqual( - nonzero_1d(np.array([False, True, True, True])).tolist(), - [1, 2, 3] + nonzero_1d(np.array([False, True, True, True])).tolist(), [1, 2, 3] ) self.assertEqual( - nonzero_1d(np.array([False, True, False, True])).tolist(), - [1, 3] - ) - self.assertEqual( - nonzero_1d(np.array([False, False, False, False])).tolist(), - [] + nonzero_1d(np.array([False, True, False, True])).tolist(), [1, 3] ) + self.assertEqual(nonzero_1d(np.array([False, False, False, False])).tolist(), []) def test_nonzero_1d_b2(self) -> None: self.assertEqual( - nonzero_1d(np.array([False, False, False, False, True])).tolist(), - [4] + nonzero_1d(np.array([False, False, False, False, True])).tolist(), [4] ) self.assertEqual( - nonzero_1d(np.array([True, False, False, False, False])).tolist(), - [0] + nonzero_1d(np.array([True, False, False, False, False])).tolist(), [0] ) def test_nonzero_1d_c(self) -> None: @@ -75,7 +58,6 @@ def test_nonzero_1d_d(self) -> None: a1[0] = True self.assertEqual(nonzero_1d(a1).tolist(), [0, 999, 99999]) - def test_nonzero_1d_e(self) -> None: a1 = np.full(10_000_000, False) a1[9_999_999] = True diff --git a/test/test_nonzero_1d_property.py b/test/test_nonzero_1d_property.py index 7238ecbb..fac608d9 100644 --- a/test/test_nonzero_1d_property.py +++ b/test/test_nonzero_1d_property.py @@ -9,24 +9,21 @@ class TestUnit(unittest.TestCase): - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- @given(st.lists(st.booleans())) def test_nonzero_1d_a(self, v) -> None: array = np.array(v, dtype=bool) post1 = nonzero_1d(array) - post2, = np.nonzero(array) # unpack tuple + (post2,) = np.nonzero(array) # unpack tuple self.assertEqual(post1.tolist(), post2.tolist()) @given(arrays(np.dtype(np.bool_), st.integers(min_value=0, max_value=10_000))) def test_nonzero_1d_b(self, array) -> None: post1 = nonzero_1d(array) - post2, = np.nonzero(array) # unpack tuple + (post2,) = np.nonzero(array) # unpack tuple self.assertEqual(post1.tolist(), post2.tolist()) - - if __name__ == '__main__': unittest.main() diff --git a/test/test_objectable.py b/test/test_objectable.py index 9d617632..1ebeb624 100644 --- a/test/test_objectable.py +++ b/test/test_objectable.py @@ -5,8 +5,8 @@ from arraykit import is_objectable_dt64 from arraykit import is_objectable -class TestUnit(unittest.TestCase): +class TestUnit(unittest.TestCase): def test_is_objectable_a1(self) -> None: a1 = np.array(['2022-01-04', '1954-04-12'], dtype=np.datetime64) self.assertTrue(is_objectable(a1)) @@ -31,8 +31,7 @@ def test_is_objectable_e(self) -> None: a1 = np.array(['b', None, False], dtype=object) self.assertTrue(is_objectable(a1)) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_is_objectable_dt64_a1(self) -> None: a1 = np.array(['2022-01-04', '1954-04-12'], dtype=np.datetime64) @@ -46,13 +45,11 @@ def test_is_objectable_dt64_a3(self) -> None: a1 = np.array(['2022-01-04', '1954-04-12', '', ''], dtype=np.datetime64) self.assertTrue(is_objectable_dt64(a1)) - def test_is_objectable_dt64_b(self) -> None: # years are nevery objectable a1 = np.array(['2022', '2023'], dtype=np.datetime64) self.assertFalse(is_objectable_dt64(a1)) - def test_is_objectable_dt64_c(self) -> None: a1 = np.array(['-120-01-01', '2023-04-05'], dtype='datetime64[m]') self.assertFalse(is_objectable_dt64(a1)) @@ -61,8 +58,6 @@ def test_is_objectable_dt64_d(self) -> None: a1 = np.array(['2024-01-01', '2023-04-05', '10000-01-01'], dtype='datetime64[s]') self.assertFalse(is_objectable_dt64(a1)) - def test_is_objectable_dt64_e(self) -> None: a1 = np.array(['2024-01-01', '2023-04-05'], dtype='datetime64[ns]') self.assertFalse(is_objectable_dt64(a1)) - diff --git a/test/test_pyi.py b/test/test_pyi.py index 8f2bcd31..171dc8d2 100644 --- a/test/test_pyi.py +++ b/test/test_pyi.py @@ -8,6 +8,7 @@ import arraykit as ak + class Interface(tp.NamedTuple): functions: tp.List[str] classes: tp.Dict[str, tp.List[str]] @@ -25,14 +26,17 @@ def _valid_name(name: str) -> bool: @classmethod def from_module(cls, module): functions: tp.List[str] = [] - classes: tp.Dict[str: tp.List[str]] = {} + classes: tp.Dict[str : tp.List[str]] = {} for name in dir(module): if not cls._valid_name(name): continue obj = getattr(module, name) - if isinstance(obj, type): # a class - if name in (ak.ErrorInitTypeBlocks.__name__, ak.NonUniqueError.__name__): + if isinstance(obj, type): # a class + if name in ( + ak.ErrorInitTypeBlocks.__name__, + ak.NonUniqueError.__name__, + ): # skip as there is Python version variability continue classes[name] = [] @@ -49,7 +53,6 @@ def from_module(cls, module): class TestUnit(unittest.TestCase): - # @unittest.skip('not sure if pyi is in right location') def test_interface(self) -> None: @@ -72,8 +75,10 @@ def test_interface(self) -> None: ak_class = ak_content.classes[name] pyi_class = pyi_content.classes[name] - if '__hash__' in ak_class: ak_class.remove('__hash__') - if '__hash__' in pyi_class: pyi_class.remove('__hash__') + if '__hash__' in ak_class: + ak_class.remove('__hash__') + if '__hash__' in pyi_class: + pyi_class.remove('__hash__') self.assertEqual(ak_class, pyi_class) diff --git a/test/test_split_after_count.py b/test/test_split_after_count.py index 9cb41c94..294d4ae0 100644 --- a/test/test_split_after_count.py +++ b/test/test_split_after_count.py @@ -1,13 +1,11 @@ - import unittest import csv from arraykit import split_after_count -class TestUnit(unittest.TestCase): - - #--------------------------------------------------------------------------- +class TestUnit(unittest.TestCase): + # --------------------------------------------------------------------------- def test_split_after_count_a(self) -> None: post = split_after_count('a,b,c,d,e', delimiter=',', count=2) self.assertEqual(post[0], 'a,b') @@ -47,12 +45,11 @@ def test_split_after_count_i(self) -> None: post = split_after_count(',,,,,,,', delimiter=',', count=4) self.assertEqual(post, (',,,', ',,,')) - def test_split_after_count_j(self) -> None: post = split_after_count(',xxxxxxxxxxxxxxx,,yyyyyyy,,,,', delimiter=',', count=4) self.assertEqual(post, (',xxxxxxxxxxxxxxx,,yyyyyyy', ',,,')) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_split_after_count_exception_a(self) -> None: with self.assertRaises(ValueError): post = split_after_count(3, delimiter=',', count=2) @@ -73,7 +70,7 @@ def test_split_after_count_exception_e(self) -> None: with self.assertRaises(TypeError): post = split_after_count('a,b,c', quoting='234', count=2) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_split_after_count_escapechar_a(self) -> None: post = split_after_count('a,b/,c,d,e', escapechar='/', count=2) @@ -87,7 +84,7 @@ def test_split_after_count_escapechar_c(self) -> None: post = split_after_count('a,b///,c,d,e', escapechar='/', count=2) self.assertEqual(post, ('a,b///,c', 'd,e')) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_split_after_count_quotechar_a(self) -> None: post = split_after_count('a,b,"c,d",e', quotechar='"', count=3) @@ -97,8 +94,7 @@ def test_split_after_count_quotechar_b(self) -> None: post = split_after_count("a,b,'c,d',e", quotechar="'", count=3) self.assertEqual(post, ("a,b,'c,d'", 'e')) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_split_after_count_quoting_a(self) -> None: post = split_after_count('a,b,"c,d",e', quoting=csv.QUOTE_NONE, count=3) @@ -108,7 +104,7 @@ def test_split_after_count_quoting_b(self) -> None: post = split_after_count('a,b,"c,d",e', quoting=csv.QUOTE_ALL, count=3) self.assertEqual(post, ('a,b,"c,d"', 'e')) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_split_after_count_doublequote_a(self) -> None: post = split_after_count('a,b,"c,"",d",e', doublequote=True, count=3) @@ -119,7 +115,5 @@ def test_split_after_count_doublequote_b(self) -> None: self.assertEqual(post, ('a,b,"c,""', 'd",e')) - - if __name__ == '__main__': unittest.main() diff --git a/test/test_tri_map.py b/test/test_tri_map.py index e7008163..e1708722 100644 --- a/test/test_tri_map.py +++ b/test/test_tri_map.py @@ -8,7 +8,6 @@ class TestUnit(unittest.TestCase): - def test_tri_map_init_a(self) -> None: with self.assertRaises(TypeError): tm1 = TriMap() @@ -18,7 +17,10 @@ def test_tri_map_init_a(self) -> None: def test_tri_map_repr_a(self) -> None: tm = TriMap(10_000, 20_000) - self.assertEqual(str(tm), '') + self.assertEqual( + str(tm), + '', + ) def test_tri_map_repr_b(self) -> None: tm = TriMap(6, 6) @@ -29,8 +31,10 @@ def test_tri_map_repr_b(self) -> None: tm.register_one(4, -1) tm.register_one(5, -1) tm.finalize() - self.assertEqual(str(tm), '') - + self.assertEqual( + str(tm), + '', + ) def test_tri_map_finalize_a(self) -> None: @@ -39,13 +43,14 @@ def test_tri_map_finalize_a(self) -> None: with self.assertRaises(RuntimeError): tm.finalize() - def test_tri_map_register_one_a(self) -> None: tm = TriMap(500, 200) tm.register_one(3, 100) with self.assertRaises(TypeError): - tm.register_one(3,) + tm.register_one( + 3, + ) with self.assertRaises(TypeError): tm.register_one(3, 'a') @@ -61,7 +66,10 @@ def test_tri_map_register_one_b(self) -> None: for i in range(2000): tm.register_one(i, i) tm.finalize() - self.assertEqual(repr(tm), '') + self.assertEqual( + repr(tm), + '', + ) def test_tri_map_register_one_c(self) -> None: tm = TriMap(20, 30) @@ -78,7 +86,6 @@ def test_tri_map_register_one_c(self) -> None: tm.finalize() self.assertTrue(tm.is_many()) - def test_tri_map_src_no_fill_a(self) -> None: tm = TriMap(3, 3) tm.register_one(0, 0) @@ -95,24 +102,29 @@ def test_tri_map_register_unmatched_dst_a(self) -> None: tm.register_one(2, 2) tm.register_unmatched_dst() - self.assertEqual(repr(tm), '') + self.assertEqual( + repr(tm), + '', + ) tm.finalize() - self.assertEqual(repr(tm), '') - + self.assertEqual( + repr(tm), + '', + ) def test_tri_map_register_many_a(self) -> None: tm = TriMap(100, 50) - tm.register_many(3, np.array([2,5,8], dtype=np.int64)) + tm.register_many(3, np.array([2, 5, 8], dtype=np.int64)) with self.assertRaises(TypeError): - tm.register_many("foo", np.array([2,5,8])) + tm.register_many('foo', np.array([2, 5, 8])) with self.assertRaises(TypeError): tm.register_many(3, [3, 2]) with self.assertRaises(ValueError): - tm.register_many(3, np.array([2,5,8], dtype=np.int32)) + tm.register_many(3, np.array([2, 5, 8], dtype=np.int32)) def test_tri_map_register_many_b(self) -> None: tm = TriMap(100, 50) @@ -123,23 +135,32 @@ def test_tri_map_register_many_c(self) -> None: tm = TriMap(100, 50) tm.register_many(3, np.array([2, 5, 8], dtype=np.int64)) tm.finalize() - self.assertEqual(repr(tm), '') + self.assertEqual( + repr(tm), + '', + ) def test_tri_map_register_many_d1(self) -> None: tm = TriMap(100, 50) for i in range(100): tm.register_many(i, np.array([3, 20], dtype=np.int64)) tm.finalize() - self.assertEqual(repr(tm), '') + self.assertEqual( + repr(tm), + '', + ) def test_tri_map_register_many_d2(self) -> None: tm = TriMap(100, 50) for i in range(100): tm.register_many(i, np.array([3, 20], dtype=np.int64)) tm.finalize() - self.assertEqual(repr(tm), '') + self.assertEqual( + repr(tm), + '', + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_tri_map_map_src_no_fill_a(self) -> None: src = np.array([10, 20, 30, 40], dtype=np.int64) @@ -217,8 +238,7 @@ def test_tri_map_map_src_no_fill_d(self) -> None: self.assertFalse(post.flags.writeable) self.assertEqual(post.tolist(), [None, None, 'bbb', 'bbb']) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_tri_map_map_src_fill_a(self) -> None: src = np.array([10, 20, 30, 40], dtype=np.int64) @@ -236,7 +256,6 @@ def test_tri_map_map_src_fill_a(self) -> None: self.assertFalse(post.flags.writeable) self.assertEqual(post.tolist(), [10, 20, 30, 40, -1, -1]) - def test_tri_map_map_src_fill_b(self) -> None: src = np.array(['aa', 'bbbbb', 'ccc', 'dddd']) @@ -265,9 +284,11 @@ def test_tri_map_map_src_fill_c(self) -> None: post = tm.map_src_fill(src, 'na', np.dtype(str)) self.assertFalse(post.flags.writeable) - self.assertEqual(post.tolist(), ['aa', None, False, 300000000000000000000, 'na', 'na']) + self.assertEqual( + post.tolist(), ['aa', None, False, 300000000000000000000, 'na', 'na'] + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_tri_map_map_a(self) -> None: src = np.array(['a', 'bbb', 'cc', 'dddd']) @@ -290,7 +311,6 @@ def test_tri_map_map_a(self) -> None: self.assertFalse(post_dst.flags.writeable) self.assertEqual(post_dst.tolist(), ['a', 'a', 'a', '', 'cc', 'cc', '']) - def test_tri_map_map_b(self) -> None: src = np.array(['a', 'bbb', 'cc', 'dddd', 'a']) dst = np.array(['cc', 'dddd', 'a', 'bbb', 'cc']) @@ -328,13 +348,12 @@ def test_tri_map_map_c(self) -> None: post_src = tm.map_src_no_fill(src) del src self.assertFalse(post_src.flags.writeable) - self.assertEqual(post_src.tolist(), [0, 200, 300, 300, 400, 0]) + self.assertEqual(post_src.tolist(), [0, 200, 300, 300, 400, 0]) post_dst = tm.map_dst_no_fill(dst) del dst self.assertFalse(post_dst.flags.writeable) - self.assertEqual(post_dst.tolist(), [0, 200, 300, 300, 400, 0]) - + self.assertEqual(post_dst.tolist(), [0, 200, 300, 300, 400, 0]) def test_tri_map_map_d(self) -> None: src = np.array([0, 200, 300, 300, 0], dtype=np.int64) @@ -356,8 +375,6 @@ def test_tri_map_map_d(self) -> None: self.assertFalse(post_dst.flags.writeable) self.assertEqual(post_dst.tolist(), [200, 200, 300, 300, 300, 300, 300, 300]) - - def test_tri_map_map_e(self) -> None: src = np.array([0, 200, 300, 5, 0], dtype=np.int64) dst = np.array([-1, 200, 300, 200, 300, -1, -1], dtype=np.int64) @@ -380,8 +397,9 @@ def test_tri_map_map_e(self) -> None: post_dst = tm.map_dst_fill(dst, -20, np.dtype(np.int64)) del dst self.assertFalse(post_dst.flags.writeable) - self.assertEqual(post_dst.tolist(), [-20, 200, 200, 300, 300, -20, -20, -1, -1, -1]) - + self.assertEqual( + post_dst.tolist(), [-20, 200, 200, 300, 300, -20, -20, -1, -1, -1] + ) def test_tri_map_map_object_a(self) -> None: src = np.array([0, 200, 300], dtype=np.int64) @@ -405,7 +423,6 @@ def test_tri_map_map_object_a(self) -> None: self.assertFalse(post_dst.flags.writeable) self.assertEqual(post_dst.tolist(), [None, 200, None, -1, 400]) - def test_tri_map_map_object_b(self) -> None: src = np.array([0, 20000, 300], dtype=np.int64) dst = np.array([-1, 20000, 20000, 20000], dtype=np.int64) @@ -448,7 +465,6 @@ def test_tri_map_map_object_c(self) -> None: post_dst = tm.map_dst_fill(dst, None, np.dtype(np.object_)) self.assertEqual(post_dst.tolist(), [None, False, False, False, None]) - def test_tri_map_map_bool_a(self) -> None: src = np.array([True, False, True], dtype=np.bool_) dst = np.array([False, False, False], dtype=np.bool_) @@ -466,7 +482,6 @@ def test_tri_map_map_bool_a(self) -> None: post_dst = tm.map_dst_fill(dst, False, np.dtype(np.bool_)) self.assertEqual(post_dst.tolist(), [False, False, False, False, False]) - def test_tri_map_map_int_a(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.int32) dst = np.array([-1, 20, 20, 8], dtype=np.int32) @@ -513,7 +528,6 @@ def test_tri_map_map_int_b(self) -> None: self.assertEqual(post_dst.dtype, np.dtype(np.int16)) self.assertEqual(post_dst.tolist(), [-10, 20, 20, 8, 8, -1]) - def test_tri_map_map_int_c(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.int8) dst = np.array([-1, 20, 20, 8], dtype=np.int8) @@ -537,7 +551,6 @@ def test_tri_map_map_int_c(self) -> None: self.assertEqual(post_dst.dtype, np.dtype(np.int8)) self.assertEqual(post_dst.tolist(), [-10, 20, 20, 8, 8, -1]) - def test_tri_map_map_uint_a(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.uint16) dst = np.array([7, 20, 20, 8], dtype=np.uint16) @@ -561,7 +574,6 @@ def test_tri_map_map_uint_a(self) -> None: self.assertEqual(post_dst.dtype, np.dtype(np.uint16)) self.assertEqual(post_dst.tolist(), [17, 20, 20, 8, 8, 7]) - def test_tri_map_map_uint_b(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.uint8) dst = np.array([7, 20, 20, 8], dtype=np.uint8) @@ -585,7 +597,6 @@ def test_tri_map_map_uint_b(self) -> None: self.assertEqual(post_dst.dtype, np.dtype(np.uint32)) self.assertEqual(post_dst.tolist(), [17, 20, 20, 8, 8, 7]) - def test_tri_map_map_uint_c(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.uint32) dst = np.array([7, 20, 20, 8], dtype=np.uint32) @@ -609,7 +620,10 @@ def test_tri_map_map_uint_c(self) -> None: self.assertEqual(post_dst.dtype, np.dtype(np.uint64)) self.assertEqual(post_dst.tolist(), [17, 20, 20, 8, 8, 7]) - self.assertEqual(str(tm), '') + self.assertEqual( + str(tm), + '', + ) def test_tri_map_map_uint_d(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.uint8) @@ -634,7 +648,6 @@ def test_tri_map_map_uint_d(self) -> None: self.assertEqual(post_dst.dtype, np.dtype(np.int16)) self.assertEqual(post_dst.tolist(), [17, 20, 20, 8, 8, 7]) - def test_tri_map_map_uint_e(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.uint8) dst = np.array([7, 20, 20, 8], dtype=np.uint8) @@ -656,7 +669,6 @@ def test_tri_map_map_uint_e(self) -> None: self.assertEqual(post_dst.dtype, np.dtype(np.int32)) self.assertEqual(post_dst.tolist(), [17, 20, 20, 8, 8, 7]) - def test_tri_map_map_uint_f(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.uint16) dst = np.array([7, 20, 20, 8], dtype=np.uint16) @@ -720,7 +732,6 @@ def test_tri_map_map_uint_h(self) -> None: self.assertEqual(post_dst.dtype, np.dtype(np.int64)) self.assertEqual(post_dst.tolist(), [17, 20, 20, 8, 8, 7]) - def test_tri_map_map_uint_i(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.uint64) dst = np.array([7, 20, 20, 8], dtype=np.uint64) @@ -742,8 +753,6 @@ def test_tri_map_map_uint_i(self) -> None: self.assertEqual(post_dst.dtype, np.dtype(np.float64)) self.assertEqual(post_dst.tolist(), [17, 20, 20, 8, 8, 7]) - - def test_tri_map_map_float_a(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.uint8) dst = np.array([7, 20, 20, 8], dtype=np.uint8) @@ -765,7 +774,6 @@ def test_tri_map_map_float_a(self) -> None: self.assertEqual(post_dst.dtype, np.dtype(np.float64)) self.assertEqual(post_dst.tolist(), [17, 20, 20, 8, 8, 7]) - def test_tri_map_map_float_b(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.int8) dst = np.array([7, 20, 20, 8], dtype=np.int8) @@ -787,7 +795,6 @@ def test_tri_map_map_float_b(self) -> None: self.assertEqual(post_dst.dtype, np.dtype(np.float64)) self.assertEqual(post_dst.tolist(), [17, 20, 20, 8, 8, 7]) - def test_tri_map_map_float_c(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.float32) dst = np.array([7, 20, 20, 8], dtype=np.float32) @@ -914,7 +921,6 @@ def test_tri_map_map_float_h(self) -> None: self.assertEqual(post_dst.dtype, np.dtype(np.float16)) self.assertEqual(post_dst.tolist(), [17, 20, 20, 8, 8, 7]) - def test_tri_map_map_float_i(self) -> None: src = np.array([0, 20, 8, 8], dtype=np.float16) dst = np.array([7, 20, 20, 8], dtype=np.float16) @@ -990,7 +996,8 @@ def test_tri_map_map_bytes_a(self) -> None: post_dst = tm.map_dst_fill(dst, b'--', np.dtype(np.bytes_)) self.assertEqual(post_src.tolist(), [b'a', b'bbb', b'cc', b'--', b'--']) self.assertEqual(post_dst.tolist(), [b'--', b'--', b'cc', b'dddd', b'eee']) - #--------------------------------------------------------------------------- + + # --------------------------------------------------------------------------- def test_tri_map_map_unicode_a(self) -> None: src = np.array(['a', 'bbb', 'cc', 'dddd']) @@ -1043,13 +1050,19 @@ def test_tri_map_map_unicode_c(self) -> None: post_dst1 = tm.map_dst_fill(dst, 3, np.dtype(object)) self.assertEqual(post_dst1.tolist(), ['a', 'a', 'a', 3, 'cc', 'cc', 3]) - self.assertEqual(str(tm), '') + self.assertEqual( + str(tm), + '', + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_tri_map_map_dt64_a(self) -> None: src = np.array(['2022-01', '1954-03', '1743-09', '1988-12'], dtype=np.datetime64) - dst = np.array(['1743-09', '2022-01', '2022-01', '2022-01', '1743-09', '2005-11'], dtype=np.datetime64) + dst = np.array( + ['1743-09', '2022-01', '2022-01', '2022-01', '1743-09', '2005-11'], + dtype=np.datetime64, + ) tm = TriMap(len(src), len(dst)) tm.register_many(0, np.array([1, 2, 3], dtype=np.dtype(np.int64))) @@ -1062,18 +1075,42 @@ def test_tri_map_map_dt64_a(self) -> None: post_src = tm.map_src_fill(src, nat, np.dtype('datetime64')) self.assertEqual(post_src.dtype, np.dtype('datetime64[M]')) # string to permit NaN comparison - self.assertEqual([str(dt) for dt in post_src], - ['2022-01', '2022-01', '2022-01', '1954-03', '1743-09', '1743-09', '1988-12', 'NaT']) + self.assertEqual( + [str(dt) for dt in post_src], + [ + '2022-01', + '2022-01', + '2022-01', + '1954-03', + '1743-09', + '1743-09', + '1988-12', + 'NaT', + ], + ) post_dst = tm.map_dst_fill(dst, nat, np.dtype('datetime64')) self.assertEqual(post_dst.dtype, np.dtype('datetime64[M]')) - self.assertEqual([str(dt) for dt in post_dst], - ['2022-01', '2022-01', '2022-01', 'NaT', '1743-09', '1743-09', 'NaT', '2005-11']) - + self.assertEqual( + [str(dt) for dt in post_dst], + [ + '2022-01', + '2022-01', + '2022-01', + 'NaT', + '1743-09', + '1743-09', + 'NaT', + '2005-11', + ], + ) def test_tri_map_map_dt64_b(self) -> None: src = np.array(['2022-01', '1954-03', '1743-09', '1988-12'], dtype=np.datetime64) - dst = np.array(['1743-09', '2022-01', '2022-01', '2022-01', '1743-09', '2005-11'], dtype=np.datetime64) + dst = np.array( + ['1743-09', '2022-01', '2022-01', '2022-01', '1743-09', '2005-11'], + dtype=np.datetime64, + ) tm = TriMap(len(src), len(dst)) tm.register_many(0, np.array([1, 2, 3], dtype=np.dtype(np.int64))) @@ -1085,17 +1122,42 @@ def test_tri_map_map_dt64_b(self) -> None: post_src = tm.map_src_fill(src, '1999-12', np.dtype('datetime64')) self.assertEqual(post_src.dtype, np.dtype('datetime64[M]')) - self.assertEqual([str(dt) for dt in post_src], - ['2022-01', '2022-01', '2022-01', '1954-03', '1743-09', '1743-09', '1988-12', '1999-12']) + self.assertEqual( + [str(dt) for dt in post_src], + [ + '2022-01', + '2022-01', + '2022-01', + '1954-03', + '1743-09', + '1743-09', + '1988-12', + '1999-12', + ], + ) post_dst = tm.map_dst_fill(dst, '1999-12', np.dtype('datetime64')) self.assertEqual(post_dst.dtype, np.dtype('datetime64[M]')) - self.assertEqual([str(dt) for dt in post_dst], - ['2022-01', '2022-01', '2022-01', '1999-12', '1743-09', '1743-09', '1999-12', '2005-11']) + self.assertEqual( + [str(dt) for dt in post_dst], + [ + '2022-01', + '2022-01', + '2022-01', + '1999-12', + '1743-09', + '1743-09', + '1999-12', + '2005-11', + ], + ) def test_tri_map_map_dt64_c(self) -> None: src = np.array(['2022-01', '1954-03', '1743-09', '1988-12'], dtype=np.datetime64) - dst = np.array(['1743-09', '2022-01', '2022-01', '2022-01', '1743-09', '2005-11'], dtype=np.datetime64) + dst = np.array( + ['1743-09', '2022-01', '2022-01', '2022-01', '1743-09', '2005-11'], + dtype=np.datetime64, + ) tm = TriMap(len(src), len(dst)) tm.register_many(0, np.array([1, 2, 3], dtype=np.dtype(np.int64))) @@ -1108,17 +1170,42 @@ def test_tri_map_map_dt64_c(self) -> None: post_src = tm.map_src_fill(src, '1999', np.dtype('datetime64[Y]')) self.assertEqual(post_src.dtype, np.dtype('datetime64[M]')) # NOTE: the year dtype is "fit" within the year-mo by defaulting to the first month; we might not want to permit this - self.assertEqual([str(dt) for dt in post_src], - ['2022-01', '2022-01', '2022-01', '1954-03', '1743-09', '1743-09', '1988-12', '1999-01']) + self.assertEqual( + [str(dt) for dt in post_src], + [ + '2022-01', + '2022-01', + '2022-01', + '1954-03', + '1743-09', + '1743-09', + '1988-12', + '1999-01', + ], + ) post_dst = tm.map_dst_fill(dst, '1999', np.dtype('datetime64[Y]')) self.assertEqual(post_dst.dtype, np.dtype('datetime64[M]')) - self.assertEqual([str(dt) for dt in post_dst], - ['2022-01', '2022-01', '2022-01', '1999-01', '1743-09', '1743-09', '1999-01', '2005-11']) + self.assertEqual( + [str(dt) for dt in post_dst], + [ + '2022-01', + '2022-01', + '2022-01', + '1999-01', + '1743-09', + '1743-09', + '1999-01', + '2005-11', + ], + ) def test_tri_map_map_dt64_d(self) -> None: src = np.array(['2022-01', '1954-03', '1743-09', '1988-12'], dtype=np.datetime64) - dst = np.array(['1743-09', '2022-01', '2022-01', '2022-01', '1743-09', '2005-11'], dtype=np.datetime64) + dst = np.array( + ['1743-09', '2022-01', '2022-01', '2022-01', '1743-09', '2005-11'], + dtype=np.datetime64, + ) tm = TriMap(len(src), len(dst)) tm.register_many(0, np.array([1, 2, 3], dtype=np.dtype(np.int64))) @@ -1130,15 +1217,37 @@ def test_tri_map_map_dt64_d(self) -> None: post_src = tm.map_src_fill(src, '1999-09-09', np.dtype('datetime64[D]')) self.assertEqual(post_src.dtype, np.dtype('datetime64[D]')) - self.assertEqual([str(dt) for dt in post_src], - ['2022-01-01', '2022-01-01', '2022-01-01', '1954-03-01', '1743-09-01', '1743-09-01', '1988-12-01', '1999-09-09']) + self.assertEqual( + [str(dt) for dt in post_src], + [ + '2022-01-01', + '2022-01-01', + '2022-01-01', + '1954-03-01', + '1743-09-01', + '1743-09-01', + '1988-12-01', + '1999-09-09', + ], + ) post_dst = tm.map_dst_fill(dst, '1999-09-09', np.dtype('datetime64[D]')) self.assertEqual(post_dst.dtype, np.dtype('datetime64[D]')) - self.assertEqual([str(dt) for dt in post_dst], - ['2022-01-01', '2022-01-01', '2022-01-01', '1999-09-09', '1743-09-01', '1743-09-01', '1999-09-09', '2005-11-01']) - - #--------------------------------------------------------------------------- + self.assertEqual( + [str(dt) for dt in post_dst], + [ + '2022-01-01', + '2022-01-01', + '2022-01-01', + '1999-09-09', + '1743-09-01', + '1743-09-01', + '1999-09-09', + '2005-11-01', + ], + ) + + # --------------------------------------------------------------------------- def test_tri_map_merge_a(self) -> None: src = np.array([0, 200, 300, 400, 0], dtype=np.int64) @@ -1168,7 +1277,6 @@ def test_tri_map_merge_a(self) -> None: with self.assertRaises(TypeError): _ = tm.map_merge(src, dst.reshape(5, 1)) - def test_tri_map_merge_a(self) -> None: src = np.array([0, 200, 300, 400, 0], dtype=np.int64) dst = np.array([300, 400, 0, 200, 300, 50, 50], dtype=np.int64) @@ -1230,8 +1338,6 @@ def test_tri_map_merge_d(self) -> None: post = tm.map_merge(src, dst) self.assertEqual(post.tolist(), ['a', 'a', 'bbb', 'cc', 'cc', 'dddd', 'ee']) - - def test_tri_map_merge_e(self) -> None: src = np.array([None, False, -42, 'dddd'], dtype=object) dst = np.array([-42, None, None, 'ee', -42], dtype=object) @@ -1248,7 +1354,14 @@ def test_tri_map_merge_e(self) -> None: self.assertEqual(post.tolist(), [None, None, False, -42, -42, 'dddd', 'ee']) def test_tri_map_merge_f(self) -> None: - src = np.array([None, False, -42,], dtype=object) + src = np.array( + [ + None, + False, + -42, + ], + dtype=object, + ) dst = np.array([True, 'ee', 88], dtype=object) tm = TriMap(len(src), len(dst)) @@ -1262,7 +1375,14 @@ def test_tri_map_merge_f(self) -> None: self.assertEqual(post.tolist(), [None, False, -42, True, 'ee', 88]) def test_tri_map_merge_g(self) -> None: - src = np.array([None, False, -42,], dtype=object) + src = np.array( + [ + None, + False, + -42, + ], + dtype=object, + ) dst = np.array([None, False, -42, 'ee', 'ff'], dtype=object) tm = TriMap(len(src), len(dst)) @@ -1292,7 +1412,10 @@ def test_tri_map_merge_h(self) -> None: def test_tri_map_merge_i(self) -> None: src = np.array(['2022-01', '1954-03', '1743-09', '1988-12'], dtype=np.datetime64) - dst = np.array(['1743-09', '2022-01', '2022-01', '2022-01', '1743-09', '2005-11'], dtype=np.datetime64) + dst = np.array( + ['1743-09', '2022-01', '2022-01', '2022-01', '1743-09', '2005-11'], + dtype=np.datetime64, + ) tm = TriMap(len(src), len(dst)) tm.register_many(0, np.array([1, 2, 3], dtype=np.dtype(np.int64))) @@ -1303,13 +1426,16 @@ def test_tri_map_merge_i(self) -> None: tm.finalize() post = tm.map_merge(src, dst) - self.assertEqual(list(post), - [np.datetime64('2022-01'), - np.datetime64('2022-01'), - np.datetime64('2022-01'), - np.datetime64('1954-03'), - np.datetime64('1743-09'), - np.datetime64('1743-09'), - np.datetime64('1988-12'), - np.datetime64('2005-11')] - ) + self.assertEqual( + list(post), + [ + np.datetime64('2022-01'), + np.datetime64('2022-01'), + np.datetime64('2022-01'), + np.datetime64('1954-03'), + np.datetime64('1743-09'), + np.datetime64('1743-09'), + np.datetime64('1988-12'), + np.datetime64('2005-11'), + ], + ) diff --git a/test/test_type_discovery.py b/test/test_type_discovery.py index 1b46b9de..5511a9b8 100644 --- a/test/test_type_discovery.py +++ b/test/test_type_discovery.py @@ -1,7 +1,3 @@ - - - - import unittest import typing as tp from enum import Enum @@ -10,7 +6,7 @@ from hypothesis import given -_ = ''' +_ = """ Discovery options: @@ -43,27 +39,32 @@ if combined with False/True, will be interpreted as str Discover contiguous numeric, i.e., if contiguous sequence of digits, e, j, sign,decimal; then, after parse complete, look at e/j/decimal counts to determine numeric type -''' +""" # functions as needed in C implementation def is_digit(c: str) -> bool: - #define isdigit_ascii(c) (((unsigned)(c) - '0') < 10u) + # define isdigit_ascii(c) (((unsigned)(c) - '0') < 10u) return c.isdigit() + def is_space(c: str) -> bool: - #define isspace_ascii(c) (((c) == ' ') || (((unsigned)(c) - '\t') < 5)) + # define isspace_ascii(c) (((c) == ' ') || (((unsigned)(c) - '\t') < 5)) return c.isspace() + def is_sign(c: str) -> bool: return c == '+' or c == '-' + def is_paren_open(c: str) -> bool: return c == '(' + def is_paren_close(c: str) -> bool: return c == ')' + def is_decimal(c: str) -> bool: return c == '.' @@ -71,33 +72,43 @@ def is_decimal(c: str) -> bool: def is_a(c: str) -> bool: return c == 'a' or c == 'A' + def is_e(c: str) -> bool: return c == 'e' or c == 'E' + def is_f(c: str) -> bool: return c == 'f' or c == 'F' + def is_i(c: str) -> bool: return c == 'i' or c == 'I' + def is_j(c: str) -> bool: return c == 'j' or c == 'J' + def is_l(c: str) -> bool: return c == 'l' or c == 'L' + def is_n(c: str) -> bool: return c == 'n' or c == 'N' + def is_r(c: str) -> bool: return c == 'r' or c == 'R' + def is_s(c: str) -> bool: return c == 's' or c == 'S' + def is_t(c: str) -> bool: return c == 't' or c == 'T' + def is_u(c: str) -> bool: return c == 'u' or c == 'U' @@ -116,20 +127,17 @@ def resolve(cls, previous: 'TypeResolved', new: 'TypeResolved') -> None: if new is cls.IS_UNKNOWN: return cls.IS_STRING - if (previous is cls.IS_UNKNOWN - or previous is cls.IS_EMPTY): + if previous is cls.IS_UNKNOWN or previous is cls.IS_EMPTY: return new # a string with anything else is a string - if (previous is cls.IS_STRING - or new is cls.IS_STRING): + if previous is cls.IS_STRING or new is cls.IS_STRING: return cls.IS_STRING if previous is cls.IS_BOOL: - if (new is cls.IS_EMPTY - or new is cls.IS_BOOL): + if new is cls.IS_EMPTY or new is cls.IS_BOOL: return cls.IS_BOOL - else: # bool found with anything else except empty is a string + else: # bool found with anything else except empty is a string return cls.IS_STRING if new is cls.IS_BOOL: if previous is cls.IS_EMPTY: @@ -137,10 +145,8 @@ def resolve(cls, previous: 'TypeResolved', new: 'TypeResolved') -> None: else: return cls.IS_STRING - if previous is cls.IS_INT: - if (new is cls.IS_EMPTY - or new is cls.IS_INT): + if new is cls.IS_EMPTY or new is cls.IS_INT: return cls.IS_INT if new is cls.IS_FLOAT: return cls.IS_FLOAT @@ -148,9 +154,7 @@ def resolve(cls, previous: 'TypeResolved', new: 'TypeResolved') -> None: return cls.IS_COMPLEX if previous is cls.IS_FLOAT: - if (new is cls.IS_EMPTY - or new is cls.IS_INT - or new is cls.IS_FLOAT): + if new is cls.IS_EMPTY or new is cls.IS_INT or new is cls.IS_FLOAT: return cls.IS_FLOAT if new is cls.IS_COMPLEX: return cls.IS_COMPLEX @@ -164,10 +168,12 @@ def resolve(cls, previous: 'TypeResolved', new: 'TypeResolved') -> None: raise NotImplementedError(previous, new) + class TypeField: - ''' + """ Estimate the type of a field. This estimate can be based on character type counts. Some ordering considerations will be ignored for convenience; if downstream parsing fails, fallback will be to a string type anyway. - ''' + """ + def __init__(self) -> None: self.reset() self.parsed_line: TypeResolved = TypeResolved.IS_UNKNOWN @@ -180,7 +186,7 @@ def reset(self) -> None: self.contiguous_numeric = False # numeric symbols; values do not need to be greater than 8 - self.count_bool = 0 # signed, not greater than +/- 5 + self.count_bool = 0 # signed, not greater than +/- 5 self.count_sign = 0 self.count_e = 0 self.count_j = 0 @@ -194,7 +200,7 @@ def reset(self) -> None: self.last_sign_pos = -1 self.count_leading_space = 0 self.count_digit = 0 - self.count_notspace = 0 # non-space, non-paren + self.count_notspace = 0 # non-space, non-paren def process_char(self, c: str, pos: int) -> int: # position is postion needs to be dropping leading space @@ -204,7 +210,7 @@ def process_char(self, c: str, pos: int) -> int: if self.parsed_field != TypeResolved.IS_UNKNOWN: return 0 - # 32 to 57; 65 to 85; 97 to 117 inclusive + # 32 to 57; 65 to 85; 97 to 117 inclusive # if ord(c) < 32 or ord(c) > 117: # less than space, greater than u # self.parsed_field = TypeResolved.IS_STRING # return 0 @@ -230,7 +236,9 @@ def process_char(self, c: str, pos: int) -> int: self.count_leading_space += 1 space = True # open paren only permitted first non-space position - if (pos > 0 and not self.contiguous_leading_space) or self.count_paren_open > 1: + if ( + pos > 0 and not self.contiguous_leading_space + ) or self.count_paren_open > 1: self.parsed_field = TypeResolved.IS_STRING return 0 elif is_paren_close(c): @@ -261,7 +269,7 @@ def process_char(self, c: str, pos: int) -> int: elif is_decimal(c): self.count_decimal += 1 - if self.count_decimal > 2: # complex can have 2! + if self.count_decimal > 2: # complex can have 2! self.parsed_field = TypeResolved.IS_STRING return 0 numeric = True @@ -275,8 +283,7 @@ def process_char(self, c: str, pos: int) -> int: self.last_sign_pos = pos_field numeric = True - - elif is_e(c): # only character that is numeric and bool + elif is_e(c): # only character that is numeric and bool self.count_e += 1 if pos_field == 0 or self.count_e > 2: # true and false each only have one E, complex can have 2 @@ -291,8 +298,7 @@ def process_char(self, c: str, pos: int) -> int: return 0 numeric = True - - #----------------------------------------------------------------------- + # ----------------------------------------------------------------------- # print(f' pre: {c=} {pos=} {pos_field=} {numeric=} {self.previous_numeric=} {self.contiguous_numeric=}') if numeric: @@ -305,13 +311,12 @@ def process_char(self, c: str, pos: int) -> int: self.contiguous_numeric = False self.previous_numeric = True - else: # not numeric, could be space or notspace + else: # not numeric, could be space or notspace if self.contiguous_numeric and not space: self.contiguous_numeric = False self.previous_numeric = False - # evaluate character positions ----------------------------------------- if space or digit: return 1 @@ -380,13 +385,12 @@ def process_char(self, c: str, pos: int) -> int: # print(f'post: {c=} {pos=} {pos_field=} {numeric=} {self.previous_numeric=} {self.contiguous_numeric=} {self.last_sign_pos=} {self.count_nan=} {self.count_inf=} {self.count_notspace=}') - return 1 def resolve_field_type(self, count: int) -> None: - ''' + """ As process char may abort early, provide final evaluation full count - ''' + """ if count == 0: return TypeResolved.IS_EMPTY @@ -398,43 +402,47 @@ def resolve_field_type(self, count: int) -> None: if self.count_bool == -5 and self.count_notspace == 5: return TypeResolved.IS_BOOL - if self.contiguous_numeric: # must have digits + if self.contiguous_numeric: # must have digits # NOTE: have already handled cases with excessive counts if self.count_digit == 0: # can have contiguous numerics like +ej.- but no digits return TypeResolved.IS_STRING - if (self.count_j == 0 - and self.count_e == 0 - and self.count_decimal == 0 - and self.count_paren_open == 0 - and self.count_paren_close == 0 - and self.count_nan == 0 - and self.count_inf == 0): + if ( + self.count_j == 0 + and self.count_e == 0 + and self.count_decimal == 0 + and self.count_paren_open == 0 + and self.count_paren_close == 0 + and self.count_nan == 0 + and self.count_inf == 0 + ): return TypeResolved.IS_INT - if (self.count_j == 0 - and self.count_sign <= 2 - and self.count_paren_open == 0 - and self.count_paren_close == 0 - and (self.count_decimal == 1 or self.count_e == 1)): + if ( + self.count_j == 0 + and self.count_sign <= 2 + and self.count_paren_open == 0 + and self.count_paren_close == 0 + and (self.count_decimal == 1 or self.count_e == 1) + ): if self.count_sign > 1 and self.count_e == 0: # if more than one sign and no e, not a float return TypeResolved.IS_STRING return TypeResolved.IS_FLOAT if self.count_j == 1 and ( - (self.count_paren_open == 1 and self.count_paren_close == 1) - or (self.count_paren_open == 0 and self.count_paren_close == 0) - ): + (self.count_paren_open == 1 and self.count_paren_close == 1) + or (self.count_paren_open == 0 and self.count_paren_close == 0) + ): if self.count_sign > 2 + self.count_e: return TypeResolved.IS_STRING return TypeResolved.IS_COMPLEX # if only paren and digits, mark as complex if self.count_j == 0 and ( - (self.count_paren_open == 1 and self.count_paren_close == 1) - ): + self.count_paren_open == 1 and self.count_paren_close == 1 + ): if self.count_e > 1 or self.count_sign > 2: return TypeResolved.IS_STRING return TypeResolved.IS_COMPLEX @@ -442,44 +450,52 @@ def resolve_field_type(self, count: int) -> None: # not contiguous numeric, has inf or nan in some combination elif self.count_j == 0: # float nan and inf that might be signed - if self.count_nan == 3 and self.count_sign + self.count_nan == self.count_notspace: + if ( + self.count_nan == 3 + and self.count_sign + self.count_nan == self.count_notspace + ): return TypeResolved.IS_FLOAT - if self.count_inf == 3 and self.count_sign + self.count_inf == self.count_notspace: + if ( + self.count_inf == 3 + and self.count_sign + self.count_inf == self.count_notspace + ): return TypeResolved.IS_FLOAT elif self.count_j == 1: # special cases of complex that do not present as contiguous numeric because of inf/nan - if self.count_inf == 3 or self.count_inf == 6 and ( - self.count_sign + self.count_inf + 1 == self.count_notspace - ): + if ( + self.count_inf == 3 + or self.count_inf == 6 + and (self.count_sign + self.count_inf + 1 == self.count_notspace) + ): return TypeResolved.IS_COMPLEX - if self.count_nan == 3 or self.count_nan == 6 and ( - self.count_sign + self.count_nan + 1 == self.count_notspace - ): + if ( + self.count_nan == 3 + or self.count_nan == 6 + and (self.count_sign + self.count_nan + 1 == self.count_notspace) + ): return TypeResolved.IS_COMPLEX - # import ipdb; ipdb.set_trace() return TypeResolved.IS_STRING - def process_field(self, field: str) -> TypeResolved: # NOTE: return TypeResolved is not necessary - self.reset() # does not reset parsed_line + self.reset() # does not reset parsed_line pos = 0 continue_process = 1 for char in field: if continue_process: continue_process = self.process_char(char, pos) - pos += 1 # results in count + pos += 1 # results in count # must call after all chars processed, does not set self.resolved field rlt_new = self.resolve_field_type(pos) self.parsed_line = TypeResolved.resolve(self.parsed_line, rlt_new) # print(f'{self.parsed_line=}') - return self.parsed_line # returning this is just for testing + return self.parsed_line # returning this is just for testing def get_resolved(self) -> TypeResolved: if self.parsed_line is TypeResolved.IS_EMPTY: @@ -495,7 +511,6 @@ def process_line(self, fields: tp.Iterable[str]) -> TypeResolved: class TestUnit(unittest.TestCase): - def test_bool_a(self) -> None: self.assertEqual(TypeField().process_field(' true'), TypeResolved.IS_BOOL) self.assertEqual(TypeField().process_field('FALSE'), TypeResolved.IS_BOOL) @@ -509,12 +524,10 @@ def test_bool_a(self) -> None: def test_bool_b(self) -> None: self.assertEqual(TypeField().process_field(' true +'), TypeResolved.IS_STRING) - def test_str_a(self) -> None: self.assertEqual(TypeField().process_field('+++'), TypeResolved.IS_STRING) self.assertEqual(TypeField().process_field(' ee '), TypeResolved.IS_STRING) - @given(st.integers()) def test_int_property(self, v) -> None: self.assertEqual(TypeField().process_field(str(v)), TypeResolved.IS_INT) @@ -588,7 +601,6 @@ def test_float_known_false_positive(self) -> None: # NOTE: we mark this as float because we do not observe that a number must follow e; assume this will fail in float conversion self.assertEqual(TypeField().process_field('8e'), TypeResolved.IS_FLOAT) - @given(st.complex_numbers()) def test_complex_property(self, v) -> None: # print(v, TypeField().process_field(str(v))) @@ -608,7 +620,9 @@ def test_complex_b(self) -> None: self.assertEqual(TypeField().process_field('2.3-3.5j '), TypeResolved.IS_COMPLEX) self.assertEqual(TypeField().process_field('+23-35j '), TypeResolved.IS_COMPLEX) self.assertEqual(TypeField().process_field('+23-3.5j '), TypeResolved.IS_COMPLEX) - self.assertEqual(TypeField().process_field('-3e-10-3e-2j'), TypeResolved.IS_COMPLEX) + self.assertEqual( + TypeField().process_field('-3e-10-3e-2j'), TypeResolved.IS_COMPLEX + ) self.assertEqual(TypeField().process_field('+23-3.5j +'), TypeResolved.IS_STRING) @@ -652,30 +666,41 @@ def test_complex_j1(self) -> None: def test_complex_j2(self) -> None: self.assertEqual(TypeField().process_field('(-23e-10e)'), TypeResolved.IS_STRING) - def test_complex_k(self) -> None: - self.assertEqual(TypeField().process_field('(-23e-10j-34e-2)'), TypeResolved.IS_COMPLEX) - self.assertEqual(TypeField().process_field('(-23e-10j-34e-2+)'), TypeResolved.IS_STRING) - + self.assertEqual( + TypeField().process_field('(-23e-10j-34e-2)'), TypeResolved.IS_COMPLEX + ) + self.assertEqual( + TypeField().process_field('(-23e-10j-34e-2+)'), TypeResolved.IS_STRING + ) def test_complex_known_false_positive(self) -> None: # NOTE: genfromtxt identifies this as string as j component is in first position self.assertEqual(TypeField().process_field('23j-43'), TypeResolved.IS_COMPLEX) self.assertEqual(TypeField().process_field('+23-3.5j3'), TypeResolved.IS_COMPLEX) - - - def test_line_a(self) -> None: - self.assertEqual(TypeField().process_line(('25', '2.5', '')), TypeResolved.IS_FLOAT) - self.assertEqual(TypeField().process_line((' .1', '2.5', '')), TypeResolved.IS_FLOAT) + self.assertEqual( + TypeField().process_line(('25', '2.5', '')), TypeResolved.IS_FLOAT + ) + self.assertEqual( + TypeField().process_line((' .1', '2.5', '')), TypeResolved.IS_FLOAT + ) self.assertEqual(TypeField().process_line(('25', '', '')), TypeResolved.IS_INT) - self.assertEqual(TypeField().process_line(('25', '2.5', 'e')), TypeResolved.IS_STRING) + self.assertEqual( + TypeField().process_line(('25', '2.5', 'e')), TypeResolved.IS_STRING + ) def test_line_b(self) -> None: - self.assertEqual(TypeField().process_line((' true', ' false', 'FALSE')), TypeResolved.IS_BOOL) - self.assertEqual(TypeField().process_line((' true', ' false', 'FALSEq')), TypeResolved.IS_STRING) + self.assertEqual( + TypeField().process_line((' true', ' false', 'FALSE')), + TypeResolved.IS_BOOL, + ) + self.assertEqual( + TypeField().process_line((' true', ' false', 'FALSEq')), + TypeResolved.IS_STRING, + ) def test_line_c(self) -> None: self.assertEqual(TypeField().process_line(('3', '', '4')), TypeResolved.IS_INT) @@ -687,27 +712,34 @@ def test_line_c(self) -> None: def test_line_d(self) -> None: self.assertEqual(TypeField().process_line(('3', '', '4.')), TypeResolved.IS_FLOAT) - self.assertEqual(TypeField().process_line(('3', '', '4e3')), TypeResolved.IS_FLOAT) - self.assertEqual(TypeField().process_line(('3', '', '(4e3)')), TypeResolved.IS_COMPLEX) - - self.assertEqual(TypeField().process_line(('3', '', '(4e3)', 'True')), TypeResolved.IS_STRING) + self.assertEqual( + TypeField().process_line(('3', '', '4e3')), TypeResolved.IS_FLOAT + ) + self.assertEqual( + TypeField().process_line(('3', '', '(4e3)')), TypeResolved.IS_COMPLEX + ) + self.assertEqual( + TypeField().process_line(('3', '', '(4e3)', 'True')), TypeResolved.IS_STRING + ) def test_line_e(self) -> None: - self.assertEqual(TypeField().process_line(('foo', '', '', 'bar')), TypeResolved.IS_STRING) + self.assertEqual( + TypeField().process_line(('foo', '', '', 'bar')), TypeResolved.IS_STRING + ) - self.assertEqual(TypeField().process_line(('', '', '', 'bar')), TypeResolved.IS_STRING) + self.assertEqual( + TypeField().process_line(('', '', '', 'bar')), TypeResolved.IS_STRING + ) def test_line_f(self) -> None: # EMPTY is treated as False - self.assertEqual(TypeField().process_line(('', '', '', 'True')), TypeResolved.IS_BOOL) + self.assertEqual( + TypeField().process_line(('', '', '', 'True')), TypeResolved.IS_BOOL + ) self.assertEqual(TypeField().process_line(('True', '')), TypeResolved.IS_BOOL) - if __name__ == '__main__': unittest.main() - - - diff --git a/test/test_util.py b/test/test_util.py index 11d59f7f..cd47d950 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,8 +1,11 @@ import pytest import collections import datetime +import os +import tempfile import unittest import warnings +from io import BytesIO from io import StringIO import numpy as np # type: ignore # import pandas as pd # disable so as to compile 32 bit wheels for python 3.12 @@ -26,16 +29,20 @@ from arraykit import slice_to_unit from arraykit import array_to_tuple_array from arraykit import array_to_tuple_iter +from arraykit import write_array_to_file -from performance.reference.util import get_new_indexers_and_screen_ak as get_new_indexers_and_screen_full +from performance.reference.util import ( + get_new_indexers_and_screen_ak as get_new_indexers_and_screen_full, +) from arraykit import get_new_indexers_and_screen from performance.reference.util import mloc as mloc_ref -from performance.reference.util import slice_to_ascending_slice as slice_to_ascending_slice_ref +from performance.reference.util import ( + slice_to_ascending_slice as slice_to_ascending_slice_ref, +) class TestUnit(unittest.TestCase): - def test_mloc_a(self) -> None: a1 = np.arange(10) self.assertEqual(mloc(a1), mloc_ref(a1)) @@ -51,7 +58,7 @@ def test_resolve_dtype_a(self) -> None: a3 = np.array(['b', 'c', 'd']) a4 = np.array([2.3, 3.2]) a5 = np.array(['test', 'test again'], dtype='S') - a6 = np.array([2.3,5.4], dtype='float32') + a6 = np.array([2.3, 5.4], dtype='float32') self.assertEqual(resolve_dtype(a1.dtype, a1.dtype), a1.dtype) @@ -68,27 +75,21 @@ def test_resolve_dtype_a(self) -> None: def test_resolve_dtype_b(self) -> None: self.assertEqual( - resolve_dtype(np.array('a').dtype, np.array('aaa').dtype), - np.dtype(('U', 3)) - ) + resolve_dtype(np.array('a').dtype, np.array('aaa').dtype), + np.dtype(('U', 3)), + ) def test_resolve_dtype_c(self) -> None: - a1 = np.array(['2019-01', '2019-02'], dtype=np.datetime64) a2 = np.array(['2019-01-01', '2019-02-01'], dtype=np.datetime64) a3 = np.array([0, 1], dtype='datetime64[ns]') a4 = np.array([0, 1]) - self.assertEqual(str(resolve_dtype(a1.dtype, a2.dtype)), - 'datetime64[D]') + self.assertEqual(str(resolve_dtype(a1.dtype, a2.dtype)), 'datetime64[D]') self.assertEqual(resolve_dtype(a1.dtype, a3.dtype).kind, 'M') - self.assertEqual( - np.datetime_data(resolve_dtype(a1.dtype, a3.dtype)), - ('ns', 1)) - self.assertEqual(resolve_dtype(a1.dtype, a4.dtype), - np.dtype('O')) - + self.assertEqual(np.datetime_data(resolve_dtype(a1.dtype, a3.dtype)), ('ns', 1)) + self.assertEqual(resolve_dtype(a1.dtype, a4.dtype), np.dtype('O')) def test_resolve_dtype_d(self) -> None: dt1 = np.array(1).dtype @@ -101,8 +102,7 @@ def test_resolve_dtype_e(self) -> None: assert resolve_dtype(dt1, dt2) == np.dtype(object) assert resolve_dtype(dt1, dt1) == dt1 - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_resolve_dtype_iter_a(self) -> None: a1 = np.array([1, 2, 3]) @@ -110,7 +110,7 @@ def test_resolve_dtype_iter_a(self) -> None: a3 = np.array(['b', 'c', 'd']) a4 = np.array([2.3, 3.2]) a5 = np.array(['test', 'test again'], dtype='S') - a6 = np.array([2.3,5.4], dtype='float32') + a6 = np.array([2.3, 5.4], dtype='float32') self.assertEqual(resolve_dtype_iter((a1.dtype, a1.dtype)), a1.dtype) self.assertEqual(resolve_dtype_iter((a2.dtype, a2.dtype)), a2.dtype) @@ -124,8 +124,12 @@ def test_resolve_dtype_iter_a(self) -> None: self.assertEqual(resolve_dtype_iter((a1.dtype, a4.dtype, a6.dtype)), np.float64) # add in bool or str, goes to object - self.assertEqual(resolve_dtype_iter((a1.dtype, a4.dtype, a6.dtype, a2.dtype)), np.object_) - self.assertEqual(resolve_dtype_iter((a1.dtype, a4.dtype, a6.dtype, a5.dtype)), np.object_) + self.assertEqual( + resolve_dtype_iter((a1.dtype, a4.dtype, a6.dtype, a2.dtype)), np.object_ + ) + self.assertEqual( + resolve_dtype_iter((a1.dtype, a4.dtype, a6.dtype, a5.dtype)), np.object_ + ) # mixed strings go to the largest self.assertEqual(resolve_dtype_iter((a3.dtype, a5.dtype)).kind, 'U') @@ -139,7 +143,7 @@ def test_resolve_dtype_iter_a(self) -> None: with pytest.raises(ValueError): resolve_dtype_iter(()) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_shape_filter_a(self) -> None: @@ -154,13 +158,13 @@ def test_shape_filter_a(self) -> None: self.assertEqual(shape_filter(a2.reshape(2, 2)), (2, 2)) with self.assertRaises(NotImplementedError): - shape_filter(a1.reshape(1,2,5)) + shape_filter(a1.reshape(1, 2, 5)) with self.assertRaises(NotImplementedError): # zero dimension shape_filter(np.array(1)) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_column_2d_filter_a(self) -> None: @@ -170,10 +174,9 @@ def test_column_2d_filter_a(self) -> None: self.assertEqual(column_2d_filter(a1.reshape(1, 10)).shape, (1, 10)) with self.assertRaises(NotImplementedError): - column_2d_filter(a1.reshape(1,2,5)) - + column_2d_filter(a1.reshape(1, 2, 5)) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_column_1d_filter_a(self) -> None: @@ -185,9 +188,9 @@ def test_column_1d_filter_a(self) -> None: column_1d_filter(a1.reshape(2, 5)) with self.assertRaises(NotImplementedError): - column_1d_filter(a1.reshape(1,2,5)) + column_1d_filter(a1.reshape(1, 2, 5)) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_row_1d_filter_a(self) -> None: @@ -199,9 +202,9 @@ def test_row_1d_filter_a(self) -> None: row_1d_filter(a1.reshape(2, 5)) with self.assertRaises(NotImplementedError): - row_1d_filter(a1.reshape(1,2,5)) + row_1d_filter(a1.reshape(1, 2, 5)) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_array_deepcopy_a1(self) -> None: a1 = np.arange(10) @@ -224,7 +227,6 @@ def test_array_deepcopy_a2(self) -> None: self.assertEqual(memo[id(a1)].tolist(), a2.tolist()) self.assertFalse(a2.flags.writeable) - def test_array_deepcopy_b(self) -> None: a1 = np.arange(10) memo = {id(a1): a1} @@ -232,7 +234,6 @@ def test_array_deepcopy_b(self) -> None: self.assertEqual(mloc(a1), mloc(a2)) - def test_array_deepcopy_c1(self) -> None: mutable = [np.nan] memo = {} @@ -287,11 +288,167 @@ def test_array_deepcopy_h(self) -> None: with self.assertRaises(TypeError): a2 = array_deepcopy(a1, ()) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- + def test_write_array_to_file_a(self) -> None: + a1 = np.arange(12).reshape(3, 4) + fp = BytesIO() + write_array_to_file(a1, fp, buffersize=2) + + expected = b''.join( + chunk.tobytes('C') + for chunk in np.nditer( + a1, + flags=('external_loop', 'buffered', 'zerosize_ok'), + buffersize=2, + order='C', + ) + ) + self.assertEqual(fp.getvalue(), expected) + + def test_write_array_to_file_b(self) -> None: + a1 = np.arange(12).reshape(3, 4).T + fp = BytesIO() + write_array_to_file(a1, fp, fortran_order=True, buffersize=2) + + expected = b''.join( + chunk.tobytes('C') + for chunk in np.nditer( + a1, + flags=('external_loop', 'buffered', 'zerosize_ok'), + buffersize=2, + order='F', + ) + ) + self.assertEqual(fp.getvalue(), expected) + + def test_write_array_to_file_c(self) -> None: + with self.assertRaises(ValueError): + write_array_to_file(np.arange(4), BytesIO(), buffersize=0) + + def test_write_array_to_file_d(self) -> None: + a1 = np.arange(4, dtype=np.int64) + fp = BytesIO() + write_array_to_file(a1, fp) + self.assertEqual(fp.getvalue(), a1.tobytes()) + + def test_write_array_to_file_e(self) -> None: + a1 = np.arange(10, dtype=np.int16)[::2] + fp = BytesIO() + write_array_to_file(a1, fp, buffersize=3) + self.assertEqual(fp.getvalue(), a1.tobytes('C')) + + def test_write_array_to_file_f(self) -> None: + a1 = np.empty(5, dtype=np.dtype('V0')) + fp = BytesIO() + write_array_to_file(a1, fp) + self.assertEqual(fp.getvalue(), b'') + + # --------------------------------------------------------------------------- + # fd fast-path tests (real files expose fileno(), so these exercise the + # direct file-descriptor write path rather than the Python write() fallback) + + def test_write_array_to_file_fd_a(self) -> None: + # basic 1d C-contiguous array to a real (buffered) file + a1 = np.arange(20, dtype=np.int64) + with tempfile.TemporaryDirectory() as d: + fp = os.path.join(d, 'a.bin') + with open(fp, 'wb') as f: + write_array_to_file(a1, f) + with open(fp, 'rb') as f: + self.assertEqual(f.read(), a1.tobytes()) + + def test_write_array_to_file_fd_b(self) -> None: + # 2d array, C order, small buffersize forces multiple chunks + a1 = np.arange(12).reshape(3, 4) + with tempfile.TemporaryDirectory() as d: + fp = os.path.join(d, 'b.bin') + with open(fp, 'wb') as f: + write_array_to_file(a1, f, buffersize=2) + with open(fp, 'rb') as f: + self.assertEqual(f.read(), a1.tobytes('C')) + + def test_write_array_to_file_fd_c(self) -> None: + # 2d fortran-ordered array written in fortran order + a1 = np.arange(12).reshape(3, 4).T + with tempfile.TemporaryDirectory() as d: + fp = os.path.join(d, 'c.bin') + with open(fp, 'wb') as f: + write_array_to_file(a1, f, fortran_order=True, buffersize=2) + with open(fp, 'rb') as f: + self.assertEqual(f.read(), a1.tobytes('F')) + + def test_write_array_to_file_fd_d(self) -> None: + # non-contiguous array (strided view) exercises the packing path + a1 = np.arange(10, dtype=np.int16)[::2] + with tempfile.TemporaryDirectory() as d: + fp = os.path.join(d, 'd.bin') + with open(fp, 'wb') as f: + write_array_to_file(a1, f, buffersize=3) + with open(fp, 'rb') as f: + self.assertEqual(f.read(), a1.tobytes('C')) + + def test_write_array_to_file_fd_flush(self) -> None: + # regression: a Python-level buffered write before the fd write must be + # preserved. Without an internal flush, the array bytes would be written + # at the kernel offset ahead of the still-buffered header, corrupting + # the file. + a1 = np.arange(8, dtype=np.int64) + header = b'HEADER-DATA' + with tempfile.TemporaryDirectory() as d: + fp = os.path.join(d, 'flush.bin') + with open(fp, 'wb') as f: + f.write(header) + write_array_to_file(a1, f) + with open(fp, 'rb') as f: + self.assertEqual(f.read(), header + a1.tobytes()) + + def test_write_array_to_file_fd_interleave(self) -> None: + # multiple array writes interleaved with Python writes stay ordered + a1 = np.arange(4, dtype=np.int32) + a2 = np.arange(4, 8, dtype=np.int32) + with tempfile.TemporaryDirectory() as d: + fp = os.path.join(d, 'inter.bin') + with open(fp, 'wb') as f: + f.write(b'<') + write_array_to_file(a1, f) + f.write(b'|') + write_array_to_file(a2, f) + f.write(b'>') + with open(fp, 'rb') as f: + self.assertEqual( + f.read(), b'<' + a1.tobytes() + b'|' + a2.tobytes() + b'>' + ) + + def test_write_array_to_file_fd_raw_fd(self) -> None: + # an os.open integer fd (no .flush) must still work + a1 = np.arange(6, dtype=np.float64) + with tempfile.TemporaryDirectory() as d: + fp = os.path.join(d, 'raw.bin') + fd = os.open(fp, os.O_WRONLY | os.O_CREAT | os.O_TRUNC) + try: + write_array_to_file(a1, fd) + finally: + os.close(fd) + with open(fp, 'rb') as f: + self.assertEqual(f.read(), a1.tobytes()) + + def test_write_array_to_file_fd_empty(self) -> None: + # zero-size array to a real file produces an empty file + a1 = np.array([], dtype=np.int64) + with tempfile.TemporaryDirectory() as d: + fp = os.path.join(d, 'empty.bin') + with open(fp, 'wb') as f: + write_array_to_file(a1, f) + with open(fp, 'rb') as f: + self.assertEqual(f.read(), b'') + + # --------------------------------------------------------------------------- def test_array_to_tuple_array_1d_a(self) -> None: a1 = np.arange(10) a2 = array_to_tuple_array(a1) - self.assertEqual(a2.tolist(), [(0,), (1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (9,)]) + self.assertEqual( + a2.tolist(), [(0,), (1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (9,)] + ) def test_array_to_tuple_array_1d_b(self) -> None: a1 = np.array(['aaa', 'b', 'ccc']) @@ -305,9 +462,9 @@ def test_array_to_tuple_array_1d_c(self) -> None: def test_array_to_tuple_array_1d_d(self) -> None: a1 = np.array([('a', 10), ('b', 30), ('c', 5)], dtype=object) - a2 = array_to_tuple_array(a1) # from 2d + a2 = array_to_tuple_array(a1) # from 2d self.assertEqual(a2.tolist(), [('a', 10), ('b', 30), ('c', 5)]) - a3 = array_to_tuple_array(a2) # from 1d + a3 = array_to_tuple_array(a2) # from 1d self.assertEqual(a3.tolist(), [('a', 10), ('b', 30), ('c', 5)]) def test_array_to_tuple_array_1d_e(self) -> None: @@ -337,9 +494,8 @@ def test_array_to_tuple_array_b(self) -> None: self.assertFalse(result.flags.writeable) self.assertEqual(tuple(result), ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9))) - def test_array_to_tuple_array_c(self) -> None: - a1 = np.array([["a", "b"], ["ccc", "ddd"], ["ee", "ff"]]) + a1 = np.array([['a', 'b'], ['ccc', 'ddd'], ['ee', 'ff']]) a2 = array_to_tuple_array(a1) self.assertEqual(a2.tolist(), [('a', 'b'), ('ccc', 'ddd'), ('ee', 'ff')]) @@ -352,19 +508,49 @@ def test_array_to_tuple_array_d(self) -> None: def test_array_to_tuple_array_e(self) -> None: a1 = np.arange(20, dtype=np.int64).reshape(4, 5) result = array_to_tuple_array(a1) - self.assertEqual(result.tolist(), [(0, 1, 2, 3, 4), (5, 6, 7, 8, 9), (10, 11, 12, 13, 14), (15, 16, 17, 18, 19)]) + self.assertEqual( + result.tolist(), + [ + (0, 1, 2, 3, 4), + (5, 6, 7, 8, 9), + (10, 11, 12, 13, 14), + (15, 16, 17, 18, 19), + ], + ) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_array_to_tuple_iter_a(self) -> None: a1 = np.arange(20, dtype=np.int64).reshape(4, 5) result = list(array_to_tuple_iter(a1)) self.assertEqual(len(result), 4) - self.assertEqual(result, [(0, 1, 2, 3, 4), (5, 6, 7, 8, 9), (10, 11, 12, 13, 14), (15, 16, 17, 18, 19)]) + self.assertEqual( + result, + [ + (0, 1, 2, 3, 4), + (5, 6, 7, 8, 9), + (10, 11, 12, 13, 14), + (15, 16, 17, 18, 19), + ], + ) def test_array_to_tuple_iter_b(self) -> None: a1 = np.arange(20, dtype=np.int64).reshape(10, 2) result = list(array_to_tuple_iter(a1)) - self.assertEqual(result, [(0, 1), (2, 3), (4, 5), (6, 7), (8, 9), (10, 11), (12, 13), (14, 15), (16, 17), (18, 19)]) + self.assertEqual( + result, + [ + (0, 1), + (2, 3), + (4, 5), + (6, 7), + (8, 9), + (10, 11), + (12, 13), + (14, 15), + (16, 17), + (18, 19), + ], + ) def test_array_to_tuple_iter_c(self) -> None: a1 = np.array([['aaa', 'bb'], ['c', 'dd'], ['ee', 'fffff']]) @@ -398,7 +584,7 @@ def test_array_to_tuple_iter_f(self) -> None: del a1 it2 = iter(it1) self.assertEqual(list(it1), [(None, 'bb'), (None, 'dd'), (3, None)]) - self.assertEqual(list(it2), []) # expected behavior + self.assertEqual(list(it2), []) # expected behavior def test_array_to_tuple_iter_g(self) -> None: a1 = np.array([[None, 'bb'], [None, 'dd'], [3, None]]) @@ -435,22 +621,24 @@ def test_array_to_tuple_iter_1d_d(self) -> None: a2 = list(array_to_tuple_iter(a1)) self.assertEqual(a2, [(3,), ('a', 30), (None, True, 90000000)]) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_isna_element_a(self) -> None: - class FloatSubclass(float): pass - class ComplexSubclass(complex): pass + class FloatSubclass(float): + pass + + class ComplexSubclass(complex): + pass self.assertTrue(isna_element(np.datetime64('NaT'))) self.assertTrue(isna_element(np.timedelta64('NaT'))) nan = np.nan complex_nans = [ - complex(nan, 0), - complex(-nan, 0), - complex(0, nan), - complex(0, -nan), + complex(nan, 0), + complex(-nan, 0), + complex(0, nan), + complex(0, -nan), ] float_classes = [float, np.float16, np.float32, np.float64, FloatSubclass] @@ -476,8 +664,20 @@ class ComplexSubclass(complex): pass def test_isna_element_b(self) -> None: # Test a wide range of float values, with different precision, across types for val in ( - 1e-1000, 1e-309, 1e-39, 1e-16, 1e-5, 0.1, 0., 1.0, 1e5, 1e16, 1e39, 1e309, 1e1000, - ): + 1e-1000, + 1e-309, + 1e-39, + 1e-16, + 1e-5, + 0.1, + 0.0, + 1.0, + 1e5, + 1e16, + 1e39, + 1e309, + 1e1000, + ): for sign in (1, -1): for ctor in (np.float16, np.float32, np.float64, float): self.assertFalse(isna_element(ctor(sign * val))) @@ -491,7 +691,6 @@ def test_isna_element_b(self) -> None: self.assertFalse(isna_element(datetime.date(2020, 12, 31))) self.assertFalse(isna_element(False)) - def test_isna_element_c(self) -> None: self.assertFalse(isna_element(None, include_none=False)) self.assertTrue(isna_element(None, include_none=True)) @@ -505,49 +704,48 @@ def test_isna_element_c(self) -> None: # s1 = pd.Series((0,)) # self.assertFalse(isna_element(s1)) - def test_isna_element_e(self) -> None: from types import SimpleNamespace + sn = SimpleNamespace() sn.to_numpy = None self.assertFalse(isna_element(sn)) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_dtype_from_element_core_dtypes(self) -> None: dtypes = [ - np.longlong, - np.int_, - np.intc, - np.short, - np.byte, - np.ubyte, - np.ushort, - np.uintc, - np.uint, - np.ulonglong, - np.half, - np.single, - np.float64, - np.longdouble, - np.csingle, - np.complex128, - np.clongdouble, - np.bool_, + np.longlong, + np.int_, + np.intc, + np.short, + np.byte, + np.ubyte, + np.ushort, + np.uintc, + np.uint, + np.ulonglong, + np.half, + np.single, + np.float64, + np.longdouble, + np.csingle, + np.complex128, + np.clongdouble, + np.bool_, ] for dtype in dtypes: self.assertEqual(dtype, dtype_from_element(dtype())) def test_dtype_from_element_str_and_misc_dtypes(self) -> None: dtype_obj_pairs = [ - (np.dtype(' None: NT = collections.namedtuple('NT', tuple('abc')) dtype_obj_pairs = [ - (np.int64, 12), - (np.float64, 12.0), - (np.bool_, True), - (np.dtype('O'), None), - (np.float64, float('NaN')), - (np.dtype('O'), object()), - (np.dtype('O'), (1, 2, 3)), - (np.dtype('O'), NT(1, 2, 3)), - (np.dtype('O'), datetime.date(2020, 12, 31)), - (np.dtype('O'), datetime.timedelta(14)), + (np.int64, 12), + (np.float64, 12.0), + (np.bool_, True), + (np.dtype('O'), None), + (np.float64, float('NaN')), + (np.dtype('O'), object()), + (np.dtype('O'), (1, 2, 3)), + (np.dtype('O'), NT(1, 2, 3)), + (np.dtype('O'), datetime.date(2020, 12, 31)), + (np.dtype('O'), datetime.timedelta(14)), ] for dtype, obj in dtype_obj_pairs: self.assertEqual(dtype, dtype_from_element(obj)) @@ -575,7 +773,9 @@ def test_dtype_from_element_time_dtypes(self) -> None: for precision in ['ns', 'us', 'ms', 's', 'm', 'h', 'D', 'M', 'Y']: for kind, ctor in (('m', np.timedelta64), ('M', np.datetime64)): obj = ctor(12, precision) - self.assertEqual(np.dtype(f'<{kind}8[{precision}]'), dtype_from_element(obj)) + self.assertEqual( + np.dtype(f'<{kind}8[{precision}]'), dtype_from_element(obj) + ) def test_dtype_from_element_str_and_bytes_dtypes(self) -> None: for size in (1, 8, 16, 32, 64, 128, 256, 512): @@ -586,7 +786,7 @@ def test_dtype_from_element_int(self) -> None: # make sure all platforms give 64 bit int self.assertEqual(str(dtype_from_element(3)), 'int64') - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_get_new_indexers_and_screen_a(self) -> None: indexersA = np.array([9, 9, 9, 9, 0, 0, 1, 4, 5, 0, 0, 0, 1], dtype=np.int64) @@ -600,21 +800,30 @@ def test_get_new_indexers_and_screen_a(self) -> None: # Prove we can handle non-continuous arrays indexersB = np.full((len(indexersA), 3), -1, dtype=np.int64) - indexersB[:,1] = indexersA.copy() - assert not indexersB[:,1].flags.c_contiguous - assert not indexersB[:,1].flags.f_contiguous - postB = get_new_indexers_and_screen_full(indexersB[:,1], np.arange(10, dtype=np.int64)) + indexersB[:, 1] = indexersA.copy() + assert not indexersB[:, 1].flags.c_contiguous + assert not indexersB[:, 1].flags.f_contiguous + postB = get_new_indexers_and_screen_full( + indexersB[:, 1], np.arange(10, dtype=np.int64) + ) assert tuple(map(list, postA)) == tuple(map(list, postB)) - indexersC = np.array([9, 9, 9, 9, 0, 0, 1, 4, 5, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=np.int64) - postC = get_new_indexers_and_screen_full(indexersC, positions=np.arange(15, dtype=np.int64)) + indexersC = np.array( + [9, 9, 9, 9, 0, 0, 1, 4, 5, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + dtype=np.int64, + ) + postC = get_new_indexers_and_screen_full( + indexersC, positions=np.arange(15, dtype=np.int64) + ) assert tuple(map(list, postC)) == ( [9, 0, 1, 4, 5, 2, 3, 6, 7, 8, 10], - [0, 0, 0, 0, 1, 1, 2, 3, 4, 1, 1, 1, 2, 5, 6, 3, 4,7, 8, 9, 0, 10], + [0, 0, 0, 0, 1, 1, 2, 3, 4, 1, 1, 1, 2, 5, 6, 3, 4, 7, 8, 9, 0, 10], ) indexersD = np.array([2, 1, 0, 2, 0, 1, 1, 2, 0], dtype=np.int64) - postD = get_new_indexers_and_screen_full(indexers=indexersD, positions=np.arange(3, dtype=np.int64)) + postD = get_new_indexers_and_screen_full( + indexers=indexersD, positions=np.arange(3, dtype=np.int64) + ) assert tuple(map(list, postD)) == ( [0, 1, 2], [2, 1, 0, 2, 0, 1, 1, 2, 0], @@ -639,7 +848,7 @@ def test_get_new_indexers_and_screen_b(self) -> None: postB = get_new_indexers_and_screen(indexersB, indexersB) assert tuple(map(list, postB)) == (list(indexersB), list(indexersB)) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_count_iteration_a(self) -> None: post = count_iteration(('a', 'b', 'c', 'd')) self.assertEqual(post, 4) @@ -649,7 +858,7 @@ def test_count_iteration_b(self) -> None: post = count_iteration(s1) self.assertEqual(post, 5) - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_first_true_1d_a(self) -> None: a1 = np.arange(100) == 50 post = first_true_1d(a1, forward=True) @@ -657,7 +866,11 @@ def test_first_true_1d_a(self) -> None: def test_first_true_1d_b(self) -> None: with self.assertRaises(TypeError): - a1 = [2, 4, 5,] + a1 = [ + 2, + 4, + 5, + ] first_true_1d(a1, forward=True) def test_first_true_1d_c(self) -> None: @@ -718,96 +931,62 @@ def test_first_true_1d_multi_b(self) -> None: self.assertEqual(first_true_1d(a1, forward=True), 10) self.assertEqual(first_true_1d(a1, forward=False), 50) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_first_true_2d_a(self) -> None: a1 = np.isin(np.arange(100), (9, 19, 38, 68, 96)).reshape(5, 20) post1 = first_true_2d(a1, axis=1, forward=True) # NOTE: this is an axis 1 result by argmax - self.assertEqual(post1.tolist(), - [9, 18, -1, 8, 16] - ) + self.assertEqual(post1.tolist(), [9, 18, -1, 8, 16]) post2 = first_true_2d(a1, axis=1, forward=False) - self.assertEqual(post2.tolist(), - [19, 18, -1, 8, 16] - ) + self.assertEqual(post2.tolist(), [19, 18, -1, 8, 16]) def test_first_true_2d_b(self) -> None: a1 = np.isin(np.arange(20), (3, 7, 10, 15, 18)).reshape(5, 4) post1 = first_true_2d(a1, axis=1, forward=False) - self.assertEqual(post1.tolist(), - [3, 3, 2, 3, 2] - ) + self.assertEqual(post1.tolist(), [3, 3, 2, 3, 2]) post2 = first_true_2d(a1, axis=1, forward=True) - self.assertEqual(post2.tolist(), - [3, 3, 2, 3, 2] - ) + self.assertEqual(post2.tolist(), [3, 3, 2, 3, 2]) post3 = first_true_2d(a1, axis=0, forward=False) - self.assertEqual(post3.tolist(), - [-1, -1, 4, 3] - ) + self.assertEqual(post3.tolist(), [-1, -1, 4, 3]) post4 = first_true_2d(a1, axis=0, forward=True) - self.assertEqual(post4.tolist(), - [-1, -1, 2, 0] - ) + self.assertEqual(post4.tolist(), [-1, -1, 2, 0]) def test_first_true_2d_c(self) -> None: a1 = np.isin(np.arange(20), ()).reshape(5, 4) post1 = first_true_2d(a1, axis=1, forward=False) - self.assertEqual(post1.tolist(), - [-1, -1, -1, -1, -1] - ) + self.assertEqual(post1.tolist(), [-1, -1, -1, -1, -1]) post2 = first_true_2d(a1, axis=1, forward=True) - self.assertEqual(post2.tolist(), - [-1, -1, -1, -1, -1] - ) + self.assertEqual(post2.tolist(), [-1, -1, -1, -1, -1]) post3 = first_true_2d(a1, axis=0, forward=False) - self.assertEqual(post3.tolist(), - [-1, -1, -1, -1] - ) + self.assertEqual(post3.tolist(), [-1, -1, -1, -1]) post4 = first_true_2d(a1, axis=0, forward=True) - self.assertEqual(post4.tolist(), - [-1, -1, -1, -1] - ) - + self.assertEqual(post4.tolist(), [-1, -1, -1, -1]) def test_first_true_2d_d(self) -> None: a1 = np.isin(np.arange(20), (0, 3, 4, 7, 8, 11, 12, 15, 16, 19)).reshape(5, 4) post1 = first_true_2d(a1, axis=1, forward=False) - self.assertEqual(post1.tolist(), - [3, 3, 3, 3, 3] - ) + self.assertEqual(post1.tolist(), [3, 3, 3, 3, 3]) post2 = first_true_2d(a1, axis=1, forward=True) - self.assertEqual(post2.tolist(), - [0, 0, 0, 0, 0] - ) + self.assertEqual(post2.tolist(), [0, 0, 0, 0, 0]) post3 = first_true_2d(a1, axis=0, forward=True) - self.assertEqual(post3.tolist(), - [0, -1, -1, 0] - ) + self.assertEqual(post3.tolist(), [0, -1, -1, 0]) post4 = first_true_2d(a1, axis=0, forward=False) - self.assertEqual(post4.tolist(), - [4, -1, -1, 4] - ) + self.assertEqual(post4.tolist(), [4, -1, -1, 4]) def test_first_true_2d_e(self) -> None: a1 = np.isin(np.arange(15), (2, 7, 12)).reshape(3, 5) post1 = first_true_2d(a1, axis=1, forward=False) - self.assertEqual(post1.tolist(), - [2, 2, 2] - ) + self.assertEqual(post1.tolist(), [2, 2, 2]) post2 = first_true_2d(a1, axis=1, forward=True) - self.assertEqual(post2.tolist(), - [2, 2, 2] - ) + self.assertEqual(post2.tolist(), [2, 2, 2]) def test_first_true_2d_f(self) -> None: a1 = np.isin(np.arange(15), (2, 7, 12)).reshape(3, 5) @@ -818,7 +997,6 @@ def test_first_true_2d_f(self) -> None: with self.assertRaises(ValueError): post1 = first_true_2d(a1, axis=2) - def test_first_true_2d_f(self) -> None: a1 = np.isin(np.arange(15), (1, 7, 14)).reshape(3, 5) post1 = first_true_2d(a1, axis=0, forward=True) @@ -827,44 +1005,37 @@ def test_first_true_2d_f(self) -> None: post2 = first_true_2d(a1, axis=0, forward=False) self.assertEqual(post2.tolist(), [-1, 0, 1, -1, 2]) - def test_first_true_2d_g(self) -> None: - a1 = np.isin(np.arange(15), (1, 7, 14)).reshape(3, 5).T # force fortran ordering - self.assertEqual(first_true_2d(a1, axis=0, forward=True).tolist(), - [1, 2, 4]) - self.assertEqual(first_true_2d(a1, axis=0, forward=False).tolist(), - [1, 2, 4]) - self.assertEqual(first_true_2d(a1, axis=1, forward=True).tolist(), - [-1, 0, 1, -1, 2]) - self.assertEqual(first_true_2d(a1, axis=1, forward=False).tolist(), - [-1, 0, 1, -1, 2]) - + a1 = np.isin(np.arange(15), (1, 7, 14)).reshape(3, 5).T # force fortran ordering + self.assertEqual(first_true_2d(a1, axis=0, forward=True).tolist(), [1, 2, 4]) + self.assertEqual(first_true_2d(a1, axis=0, forward=False).tolist(), [1, 2, 4]) + self.assertEqual( + first_true_2d(a1, axis=1, forward=True).tolist(), [-1, 0, 1, -1, 2] + ) + self.assertEqual( + first_true_2d(a1, axis=1, forward=False).tolist(), [-1, 0, 1, -1, 2] + ) def test_first_true_2d_h(self) -> None: # force fortran ordering, non-contiguous, non-owned a1 = np.isin(np.arange(15), (1, 4, 5, 7, 8, 12, 15)).reshape(3, 5).T[:4] - self.assertEqual(first_true_2d(a1, axis=0, forward=True).tolist(), - [1, 0, 2]) - self.assertEqual(first_true_2d(a1, axis=0, forward=False).tolist(), - [1, 3, 2]) - self.assertEqual(first_true_2d(a1, axis=1, forward=True).tolist(), - [1, 0, 1, 1]) - self.assertEqual(first_true_2d(a1, axis=1, forward=False).tolist(), - [1, 0, 2, 1]) + self.assertEqual(first_true_2d(a1, axis=0, forward=True).tolist(), [1, 0, 2]) + self.assertEqual(first_true_2d(a1, axis=0, forward=False).tolist(), [1, 3, 2]) + self.assertEqual(first_true_2d(a1, axis=1, forward=True).tolist(), [1, 0, 1, 1]) + self.assertEqual(first_true_2d(a1, axis=1, forward=False).tolist(), [1, 0, 2, 1]) - - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- def test_slice_to_ascending_slice_a(self) -> None: - self.assertEqual(slice_to_ascending_slice( - slice(5, 2, -1), 6), - slice(3, 6, None), - ) + self.assertEqual( + slice_to_ascending_slice(slice(5, 2, -1), 6), + slice(3, 6, None), + ) def test_slice_to_ascending_slice_b(self) -> None: - self.assertEqual(slice_to_ascending_slice( - slice(2, 5, 1), 6), - slice(2, 5, 1), - ) + self.assertEqual( + slice_to_ascending_slice(slice(2, 5, 1), 6), + slice(2, 5, 1), + ) def test_slice_to_ascending_slice_c(self) -> None: with self.assertRaises(TypeError): @@ -874,24 +1045,24 @@ def test_slice_to_ascending_slice_c(self) -> None: _ = slice_to_ascending_slice(slice(1, 4), 'x') def test_slice_to_ascending_slice_d(self) -> None: - self.assertEqual(slice_to_ascending_slice( - slice(10, 2, -2), 12), - slice(4, 11, 2), - ) + self.assertEqual( + slice_to_ascending_slice(slice(10, 2, -2), 12), + slice(4, 11, 2), + ) def test_slice_to_ascending_slice_e(self) -> None: for slc, size in ( - (slice(10, 2, -2), 12), - (slice(12, 2, -3), 12), - (slice(12, None, -4), 12), - (slice(76, 12, -8), 100), - (slice(81, 33, -12), 100), - (slice(97, 6, -7), 101), - ): + (slice(10, 2, -2), 12), + (slice(12, 2, -3), 12), + (slice(12, None, -4), 12), + (slice(76, 12, -8), 100), + (slice(81, 33, -12), 100), + (slice(97, 6, -7), 101), + ): self.assertEqual( slice_to_ascending_slice(slc, size), slice_to_ascending_slice_ref(slc, size), - ) + ) def test_slice_to_ascending_slice_f(self) -> None: @@ -901,7 +1072,11 @@ def compare(slc: slice) -> None: slc_asc = slice_to_ascending_slice(slc, len(a1)) self.assertEqual(sorted(a1[slc]), list(a1[slc_asc])) - compare(slice(4,)) + compare( + slice( + 4, + ) + ) compare(slice(6, 1, -1)) compare(slice(6, 1, -2)) compare(slice(6, None, -3)) @@ -910,49 +1085,38 @@ def compare(slc: slice) -> None: def test_slice_to_ascending_slice_g(self) -> None: self.assertEqual( - slice_to_ascending_slice(slice(3, None, -1), 10), - slice(0, 4, None) - ) - self.assertEqual( - slice_to_ascending_slice(slice(3, None, -3), 10), - slice(0, 4, 3) - ) - self.assertEqual( - slice_to_ascending_slice(slice(-3, 0, -1), 10), - slice(1, 8, None) - ) + slice_to_ascending_slice(slice(3, None, -1), 10), slice(0, 4, None) + ) + self.assertEqual(slice_to_ascending_slice(slice(3, None, -3), 10), slice(0, 4, 3)) self.assertEqual( - slice_to_ascending_slice(slice(-3, None, -1), 10), - slice(0, 8, None) - ) + slice_to_ascending_slice(slice(-3, 0, -1), 10), slice(1, 8, None) + ) self.assertEqual( - slice_to_ascending_slice(slice(-3, 0, -2), 10), - slice(1, 8, 2) - ) + slice_to_ascending_slice(slice(-3, None, -1), 10), slice(0, 8, None) + ) + self.assertEqual(slice_to_ascending_slice(slice(-3, 0, -2), 10), slice(1, 8, 2)) self.assertEqual( - slice_to_ascending_slice(slice(-3, None, -2), 10), - slice(1, 8, 2) - ) + slice_to_ascending_slice(slice(-3, None, -2), 10), slice(1, 8, 2) + ) self.assertEqual( - slice_to_ascending_slice(slice(-3, None, -6), 10), - slice(1, 8, 6) - ) + slice_to_ascending_slice(slice(-3, None, -6), 10), slice(1, 8, 6) + ) def test_slice_to_ascending_slice_h(self) -> None: self.assertEqual( slice_to_ascending_slice(slice(-9, -1, 1), 10), - slice(-9, -1, 1) # ascenidng - ) + slice(-9, -1, 1), # ascenidng + ) self.assertEqual( slice_to_ascending_slice(slice(-9, -1, -1), 10), - slice(2, 2, None) # ascending start stop, descending - ) + slice(2, 2, None), # ascending start stop, descending + ) def test_slice_to_ascending_slice_i(self) -> None: self.assertEqual( - slice_to_ascending_slice(slice(1, -10, -1), 10), # [1] - slice(1, 2, None) - ) + slice_to_ascending_slice(slice(1, -10, -1), 10), # [1] + slice(1, 2, None), + ) def test_slice_to_unit_a(self) -> None: self.assertEqual(slice_to_unit(slice(3, 4)), 3)