Skip to content

Commit

Permalink
Data.translate_to_txt (#1074)
Browse files Browse the repository at this point in the history
* working example

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update _data.py

* cleanup

* Update _data.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* cleanup

* Update _data.py

* attrs

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* verbose kwarg

* test

not meant to run in pytests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update translate_to_txt.py

* Update translate_to_txt.py

* write in chunks

* ksunden comments

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update translate_to_txt.py

ensure tempfile cleanup
minimal check for correct output

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update CHANGELOG.md

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ddkohler and pre-commit-ci[bot] committed Jul 6, 2023
1 parent f23f1e2 commit 9a299f0
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 0 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/).
- `Axis`: space character ("\s") in expressions are culled.
- fixed `interact2D` bug: channel/axes can now be specified with non-zero index arguments

### Added
- `Data.translate_to_txt`: serialize channels and variables and write as a text file.

## [3.4.6]

### Fixed
Expand Down
89 changes: 89 additions & 0 deletions WrightTools/data/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import warnings

import numpy as np
import sys

import h5py

Expand Down Expand Up @@ -1928,6 +1929,94 @@ def transform(self, *axes, verbose=True):
elif verbose and newt and not nownewt:
print("I got better")

def translate_to_txt(
self, filepath, delimiter="\t", channels=None, variables=None, fmt=".5g", verbose=True
):
"""
Write a serialized, readable list of the channels and variables to
file. Each line (row) denotes a seperate data point. Each column
represents a unique variable or channel. Axes are neglected.
Parameters
----------
filepath: path-like
path to output file.
delimiter: str
separation character. Defaults to "\t"
channels: list[str] (optional)
List of channel names to include. Default is all channels.
variables: list[str] (optional)
List of variable names to include. Default is all variables.
fmt: [python format spec](https://docs.python.org/3/library/string.html#formatspec)
format specifier for variables and channels
Returns
-------
None
Notes
-----
* This is a lossy write procedure; some properties, such as axes, are
not recorded.
* The shape structure of the data is recorded as a series of indices
(`{a_i}`) comprising the first few columns. A vertical line separates
these indexes from variables and channels
* wt5, the native file format for Data objects, is a specific variant
of the HDF5 file format. HDF5 has well-developed tools for working with
generic datasets. See
https://portal.hdfgroup.org/display/knowledge/How+to+convert+an+HDF4+or+HDF5+file+to+ASCII+%28text%29+or+Excel
for more information.
"""
# attrs
import tidy_headers

tidy_headers.write(filepath, {k: v for k, v in self.attrs.items()})

columns = [f"a_{i}" for i in range(self.ndim)]
columns.append("|")
is_broadcast = []

variables = list(self.variable_names) if variables is None else variables
for var in variables:
columns.append(f"{var} ({self[var].units})")
is_broadcast.append([i == 1 for i in self[var].shape])

channels = list(self.channel_names) if channels is None else channels
for ch in channels:
columns.append(f"{ch} ({self[ch].units})")
is_broadcast.append([i == 1 for i in self[ch].shape])

with open(filepath, "a") as f:
f.write(delimiter.join(columns) + "\n")
chunk = ""
for i, ndi in enumerate(np.ndindex(self.shape)):
line = [str(i) for i in ndi]
line += ["|"]
for j, name in enumerate(variables + channels):
arr = self[name]
# broadcast reduced dimensions arrays to full
idxs = tuple(xi * (not yi) for xi, yi in zip(ndi, is_broadcast[j]))
line.append(f"{arr[idxs]:{fmt}}")
if verbose and ((i == 0) or (not (i % 10))):
frac = round(i / self.size, 3)
sys.stdout.write(
f"[{'=' * int(frac * 60): <60}] {frac * 100:0.1f}% ...to_txt\r"
)
sys.stdout.flush()
chunk += delimiter.join(line) + "\n"
if (i % 100) == 99: # write to disk periodically
f.write(chunk)
chunk = ""
if chunk:
f.write(chunk)

if verbose:
sys.stdout.write(f"[{'=' * 60}] {100:0.1f}% ...done! \r")
sys.stdout.flush()

def set_constants(self, *constants, verbose=True):
"""Set the constants associated with the data.
Expand Down
41 changes: 41 additions & 0 deletions tests/data/translate_to_txt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Test transform."""


# --- import --------------------------------------------------------------------------------------


import pathlib
import WrightTools as wt

from WrightTools import datasets
from tempfile import NamedTemporaryFile


# --- tests ---------------------------------------------------------------------------------------


def test_datasets_mos2():
d = wt.open(datasets.wt5.v1p0p1_MoS2_TrEE_movie).at(w2=[18000, "wn"])[:5]

with NamedTemporaryFile(delete=False) as tmp:
d.translate_to_txt(tmp.name, verbose=True)

with open(tmp.name, "r") as f:
for i in range(100):
f.readline()
datum_txt = f.readline().split("|")
id1, id2 = [_ for _ in map(int, datum_txt[0].split())]
values = [_ for _ in map(float, datum_txt[1].split())]
datum_wt5 = d[int(id1), int(id2)]
for i, vari in enumerate(d.variable_names):
# print(vari, values[i], datum_wt5[vari][:])
assert (values[i] - datum_wt5[vari][:]) ** 2 <= (1e-4 * values[i]) ** 2

d.close()


# --- run -----------------------------------------------------------------------------------------


if __name__ == "__main__":
test_datasets_mos2()

0 comments on commit 9a299f0

Please sign in to comment.