Skip to content

files

Working with file inputs and outputs

BadArchiveError

Bases: OSError

The archive contains a bad file

Source code in bag3d/common/utils/files.py
14
15
16
17
class BadArchiveError(OSError):
    """The archive contains a bad file"""

    pass

bag3d_dir(root_dir)

The 3D BAG data directory

Source code in bag3d/common/utils/files.py
43
44
45
def bag3d_dir(root_dir: os.PathLike) -> Path:
    """The 3D BAG data directory"""
    return Path(root_dir) / "3DBAG"

bag3d_export_dir(root_dir)

Create the 3DBAG export directory if does not exist

Source code in bag3d/common/utils/files.py
53
54
55
56
57
def bag3d_export_dir(root_dir: os.PathLike) -> Path:
    """Create the 3DBAG export directory if does not exist"""
    export_dir = bag3d_dir(root_dir) / "export"
    export_dir.mkdir(exist_ok=True)
    return export_dir

check_export_results(path_quadtree_tsv, path_tiles_dir)

Parse the quadtree.tsv written by tyler, check if all formats exists for each tile, add the tile WKT.

Returns:

Type Description
Iterator[ExportResult]

Generator of ExportResult

Source code in bag3d/common/utils/files.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def check_export_results(
    path_quadtree_tsv: Path, path_tiles_dir: Path
) -> Iterator[ExportResult]:
    """Parse the `quadtree.tsv` written by *tyler*, check if all formats exists for each
    tile, add the tile WKT.

    Returns:
         Generator of ExportResult
    """
    with path_quadtree_tsv.open("r") as fo:
        csvreader = csv.DictReader(fo, delimiter="\t")
        for row in csvreader:
            if row["leaf"] == "true" and int(row["nr_items"]) > 0:
                leaf_id = row["id"]
                leaf_id_in_filename = leaf_id.replace("/", "-")
                leaf_dir = path_tiles_dir.joinpath(leaf_id)
                if leaf_dir.exists():
                    obj_paths = tuple(
                        p for p in leaf_dir.iterdir() if p.suffix == ".obj"
                    )
                    basename = path_tiles_dir.joinpath(leaf_id, leaf_id_in_filename)
                    yield ExportResult(
                        tile_id=leaf_id,
                        cityjson_path=basename.with_suffix(".city.json"),
                        gpkg_path=basename.with_suffix(".gpkg"),
                        obj_paths=obj_paths,
                        wkt=row["wkt"],
                    )

geoflow_crop_dir(root_dir)

Directory for the Geoflow crop-reconstruct output

Source code in bag3d/common/utils/files.py
48
49
50
def geoflow_crop_dir(root_dir: os.PathLike) -> Path:
    """Directory for the Geoflow crop-reconstruct output"""
    return bag3d_dir(root_dir) / "crop_reconstruct"

get_export_tile_ids()

Get the IDs of the distribution tiles from the file system. It reads the quadtree.tsv output from tyler and extracts the IDs of the leaf tiles.

Fixme
  • Currently we read the root data dir from the BAG3D_EXPORT_DIR environment variable, or use /data/3DBAG/export as default. Maybe we could consolidate all resource configurations to .env files and load from there in all places. But need to be able to load different .env files and production and dev setup.

Returns:

Type Description
Sequence[str]

List of tile IDs

Source code in bag3d/common/utils/files.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def get_export_tile_ids() -> Sequence[str]:
    """Get the IDs of the distribution tiles from the file system.
    It reads the `quadtree.tsv` output from *tyler* and extracts the IDs of the
    leaf tiles.

    Fixme:
        * Currently we read the root data dir from the BAG3D_EXPORT_DIR environment
        variable, or use /data/3DBAG/export as default. Maybe we could consolidate all resource
        configurations to .env files and load from there in all places. But need to be
        able to load different .env files and production and dev setup.

    Returns:
        List of tile IDs
    """
    tileids = []
    export_dir = Path(os.environ.get("BAG3D_EXPORT_DIR", "/data/3DBAG/export"))
    if export_dir.exists():
        path_tiles_dir = export_dir.joinpath("tiles")
        path_quadtree_tsv = export_dir.joinpath("quadtree.tsv")
        if path_quadtree_tsv.exists():
            tileids = [
                er.tile_id
                for er in check_export_results(path_quadtree_tsv, path_tiles_dir)
            ]
    else:
        raise FileNotFoundError(f"""Export directory {export_dir} does not exist.
                                You need to set the BAG3D_EXPORT_DIR variable.""")
    return tileids

unzip(file, dest)

Uncompress the whole zip archive and delete the zip.

Parameters:

Name Type Description Default
file Path

The Path to the zip.

required
dest Path

The Path to the destination directory.

required

Raises:

Type Description
BadArchiveError

The archive contains at least one bad file

Source code in bag3d/common/utils/files.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def unzip(file: Path, dest: Path) -> None:
    """Uncompress the whole zip archive and delete the zip.

    Args:
        file: The Path to the zip.
        dest: The Path to the destination directory.

    Raises:
        BadArchiveError: The archive contains at least one bad file
    """
    logger = get_dagster_logger()
    logger.info(f"Uncompressing {file} to {dest}")
    with ZipFile(file, "r") as ezip:
        first_bad_file = ezip.testzip()
        if first_bad_file:
            raise BadArchiveError(
                f"The archive contains at least one bad file: {first_bad_file}"
            )
        ezip.extractall(path=dest)
    logger.info(f"Deleting {file}")
    file.unlink()