import numpy as np
import struct
from operator import mul
[docs]class BinFile(object):
"""
Handles the low-level reading and writing of binary files. Inherit from this class for all your binary file I/O
needs.
Args:
file_name (str): Name of the file to open.
mode (str): Read/write mode for the file. Default is 'r' (for reading).
byteorder (str): Endianness for the file. Acceptable values are '<' for little-endian, '>' for big endian.
Default is '<' (little endian).
"""
ANCH_FILEBEG = 0
ANCH_CURPOS = 1
ANCH_FILEEND = 2
def __init__(self, file_name, mode='r', byteorder="<"):
"""
Constructor for the BinFile class
"""
self._bin_file = open(file_name, "%sb" % mode)
self._byteorder = byteorder
self._mode = mode
self._file_pos = self._bin_file.tell()
self._read_target = False
return
[docs] def close(self):
"""
Close the file
"""
self._bin_file.close()
self._file_pos = -1
return
[docs] def _read(self, type_string, _peeking=False):
"""
Read values from the file and return as a specific type.
Args:
type_string (str): The number and type of data to read from the file. The form of the string is `nt`, where
`n` is the number of values to read, and `t` is the type of the values. If `n` is omitted, it is assumed
to be 1. Acceptable values for `t` can be found in the table below.
Returns:
Data from the file as the specified type. For example, a type string of '4f' will return a list of 4 floats.
A type string of 'i' will return a single integer.
Some possible values for the type character are as follows:
+----------------+------------------------+
| Type Character | Meaning |
+================+========================+
| i | signed 32-bit integer |
+----------------+------------------------+
| h | signed 16-bit integer |
+----------------+------------------------+
| b | signed 8-bit integer |
+----------------+------------------------+
| f | single-precision float |
+----------------+------------------------+
| d | double-precision float |
+----------------+------------------------+
| c | single character |
+----------------+------------------------+
| s | character string |
+----------------+------------------------+
The `s` character may be post-fixed with a number that tells the length of the string. For example,
`s10` refers to a string of length 10. See https://docs.python.org/2/library/struct.html#format-characters
for a full description.
"""
self._bin_file.seek(self._file_pos)
size = struct.calcsize(type_string)
raw = self._bin_file.read(size)
if raw == "":
return ""
data = struct.unpack("%s%s" % (self._byteorder, type_string), raw)
if not _peeking:
self._file_pos = self._bin_file.tell()
if len(data) == 1:
if type_string[-1] == 's':
return data[0].strip("\0").strip()
else:
return data[0]
else:
return list(data)
[docs] def _write(self, value, type_string):
"""
Write values to the file.
Args:
type_string (str): See :py:meth:`_read()` for a full description.
"""
if type(value) not in [ list, tuple ]:
value = [value]
self._bin_file.write(struct.pack("%s%s" % (self._byteorder, type_string), *value))
return
[docs] def _read_grid(self, type_string, shape, fortran=False):
"""
Read a grid from the file.
Args:
type_string (str): As in :py:meth`_read()`, but only the type character. The number is determined by the
`shape` argument.
shape (tuple): The shape of the grid to return.
fortran (bool): Whether or not to write the grid as a Fortran-formatted block. Default is False.
Returns:
A grid of data as a numpy array.
"""
if fortran:
block_size = self._read('i')
length = reduce(mul, shape)
grid = np.array(self._read("%d%s" % (length, type_string))).reshape(shape, order='F')
if fortran:
self._read('i')
return grid
[docs] def _write_grid(self, type_string, grid, fortran=False):
"""
Write a grid to the file.
Args:
type_string (str): As in :py:meth`_read()`, but only the type character. The number is determined by the
`shape` argument.
shape (tuple): The shape of the grid to return.
fortran (bool): Whether or not to write the grid as a Fortran-formatted block. Default is False.
"""
block_size = reduce(mul, grid.shape)
if fortran:
self._write(block_size, 'i')
self._write(list(np.ravel(grid, order='F')), "%d%s" % (block_size, type_string))
if fortran:
self._write(block_size, 'i')
return
[docs] def _read_block(self, type_dict, dest_dict, fortran=False):
"""
Read a block of data from the file.
Args:
type_dict (OrderedDict): An ordered dictionary with variable names as keys and type strings (see
:py:meth:`_read()`) as values.
dest_dict (dict): A dictionary in which to place the values when they've been read from the file.
fortran (bool): Whether or not to read the block as a Fortran-formmated block. Default is false.
"""
if fortran:
block_size = self._read('i')
if block_size != self._compute_block_size(type_dict):
raise IOError("readBlock(): Got an unexpected block.")
for key, type in type_dict.iteritems():
data = self._read(type)
if key != '__dummy__':
dest_dict[key] = data
if fortran:
self._read('i')
return
[docs] def _write_block(self, type_dict, src_dict, fortran=False):
"""
Write a block of data from the file.
Args:
type_dict (OrderedDict): An ordered dictionary with variable names as keys and type strings (see
:py:meth:`_read()`) as values.
src_dict (dict): A dictionary containing the variable names as keys and their values.
fortran (bool): Whether or not to read the block as a Fortran-formmated block. Default is false.
"""
block_size = self._compute_block_size(type_dict)
if fortran:
self._write(block_size, 'i')
for key, type in type_dict.iteritems():
self._write(src_dict[key], type)
if fortran:
self._write(block_size, 'i')
return
[docs] def _compute_block_size(self, type_dict):
return struct.calcsize("".join(type_dict.values()))
[docs] def _peek(self, type_string):
"""
Peek at a value in the file.
Args:
type_string (str): See :py:meth:`_read()`.
Returns:
Data from the file (see :py:meth:`_read()`).
"""
return self._read(type_string, _peeking=True)
[docs] def _seek(self, location, anchor=ANCH_FILEBEG):
"""
Move the file pointer to a particular location.
Args:
location (int): Location in the file in number of bytes.
anchor (int): The point in the file to which *location* is relative. For example, `BinFile.ANCH_FILEBEG`
means that *location* is relative to the beginning of the file. Default value is `BinFile.ANCH_FILEBEG`.
"""
self._bin_file.seek(location, anchor)
self._file_pos = self._bin_file.tell()
return
[docs] def _tell(self):
"""
Get the current location of the file pointer in bytes from the start of the file.
"""
return self._file_pos
[docs] def _ateof(self):
"""
Returns True if the pointer has reached the end of the file, returns False otherwise.
"""
return self._bin_file.read(1) == ""
if __name__ == "__main__":
bf = BinFile("qc/manual/1km/KCYS.20090605.215744")
assert bf._peek('i') == 28
assert bf._read('i') == 28
print "Unit tests done."