Source code for pyprecag.convert

# -*- coding: utf-8 -*-
"""
Format conversion routines.
"""
from  datetime import timedelta
import inspect
import logging
import math
import os
import time

import numpy as np

import pandas as pd
from pandas.core.dtypes.common import is_string_dtype

from osgeo import ogr, gdal
import geopandas
from geopandas import GeoDataFrame, GeoSeries

import rasterio
from rasterio import features

from fiona import collection as fionacoll
from fiona.crs import from_epsg
from shapely.geometry import Point, mapping, shape, LineString

from . import crs as pyprecag_crs
from . import TEMPDIR, config
from .describe import CsvDescribe, predictCoordinateColumnNames, VectorDescribe, \
    save_geopandas_tofile

from .errors import GeometryError
from .raster_ops import raster_snap_extent, create_raster_transform

LOGGER = logging.getLogger(__name__)
LOGGER.addHandler(logging.NullHandler())


[docs]def numeric_pixelsize_to_string(pixelsize_metres):
    """
    Convert a numeric pixel size to a string with the appropriate units for use in file names.
    For example 0.40 is 40cm,  0.125 is 125mm, 1.5km is 1500m,  2.0 is 2m, 6000 is 6km
    Args:
        pixelsize_metres (numeric): The pixel size in meters.

    Returns (str): a string representation of the pixel size
    """
    if isinstance(pixelsize_metres, str):
        raise ValueError('Pixel size should be numeric (int, float etc)')

    if pixelsize_metres % 1000.0 == 0:  # or km
        result = '{}km'.format(int(pixelsize_metres / 1000))
    elif pixelsize_metres % 1 == 0:  # is in m
        result = '{}m'.format(int(pixelsize_metres))
    elif (pixelsize_metres * 100.0) % 1 == 0:  # is in cm
        result = '{}cm'.format(int(pixelsize_metres * 100))
    else:  # or mm
        result = '{}mm'.format(int(pixelsize_metres * 1000))
    return result


[docs]def point_to_point_bearing(pt1, pt2):
    """Calculate bearing from north between two points """
    # variation of  https://community.esri.com/thread/27393

    if isinstance(pt1, tuple):
        pt1 = Point(pt1)

    if isinstance(pt2, tuple):
        pt2 = Point(pt2)

    bearing = 180 + math.atan2((pt2.x - pt1.x), (pt2.y - pt1.y)) * (180 / math.pi)
    return bearing


[docs]def line_bearing(line):
    """ The bearing on the line between the start and end nodes/points"""
    if not isinstance(line, LineString):
        line = line.geometry
    return point_to_point_bearing(line.coords[-1], line.coords[0])


[docs]def deg_to_16_compass_pts(degrees):
    """
    Convert a compass bearing from north using 16 cardinal directions
    Args:
        degrees (float): Degrees from north
    Returns:
        direction(str) : letters representing the compass point
    source: https://stackoverflow.com/a/7490772
    """

    # fix if a negative number is used ie -45 turns into 315
    if degrees < 0:
        degrees = 360.0 + degrees
    val = int((degrees / 22.5) + .5)

    arr = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW",
           "NW", "NNW"]
    return arr[(val % 16)]


[docs]def deg_to_8_compass_pts(degrees):
    """
    Convert a compass bearing from north using 8 cardinal directions
    Args:
        degrees (float): Degrees from north
    Returns:
        direction(str) : letters representing the compass point

    source: variation of https://stackoverflow.com/a/7490772
    """
    # fix if a  negative number is used
    if degrees < 0:
        degrees = 360.0 + degrees

    val = int((degrees / 45.0) + .5)

    arr = ["N", "NE", "E", "SE", "S", "SW", "W", "NW"]

    return arr[(val % 8)]


[docs]def drop_z(geom):
    if isinstance(geom, LineString):
        return LineString([xy[0:2] for xy in list(geom.coords)])
    if isinstance(geom, Point):
        return Point([geom.coords[0:2]])


[docs]def text_rotation(pt1, pt2):
    # calculate an angle to use for labelling in matplotlib plots
    # https://stackoverflow.com/a/35825527
    rotation = np.rad2deg(np.arctan2(pt2.y - pt1.y, pt2.x - pt1.x))
    # need to adjust slighlt to avoid upside down labels
    if rotation < -100:
        rotation = np.rad2deg(np.arctan2(pt1.y - pt2.y, pt1.x - pt2.x))
    elif rotation >= 170:
        rotation = rotation - 180
    return rotation


[docs]def convert_polygon_to_grid(in_shapefilename,
                            out_rasterfilename,
                            pixel_size,
                            nodata_val=-9999,
                            snap_extent_to_pixel=True,
                            overwrite=True):
    """ Convert a polygon shapefile to a raster for a set pixel size.

    source : http://ceholden.github.io/open-geo-tutorial/python/chapter_4_vector.html

    Args:
        in_shapefilename (str): A polygon shapefile to rasterise
        out_rasterfilename (str): the output raster name
        pixel_size (float): the pixel size for the raster
        nodata_val (int):the value to use for nodata
        snap_extent_to_pixel (bool):round the extent coordinates to be divisible by the pixel size
        overwrite (bool): if true overwrite existing output file

    Returns:
        None:
    """

    start_time = time.time()

    if not os.path.exists(in_shapefilename):
        raise IOError("Invalid path: {}".format(in_shapefilename))

    shp_ds = ogr.Open(in_shapefilename)
    shp_layer = shp_ds.GetLayer()

    x_min, x_max, y_min, y_max = shp_layer.GetExtent()

    LOGGER.info('\tVector Extent: LL:{} {}'.format(x_min, y_min))
    LOGGER.info('\t               UR:{} {}'.format(x_max, y_max))

    # Snap output grids to a multiple of the grid size, allowing adjacent blocks to align nicely.
    if snap_extent_to_pixel:
        x_min, y_min, x_max, y_max = raster_snap_extent(x_min, y_min, x_max, y_max, pixel_size)
        LOGGER.info('\tRaster Extent: Snap is {}'.format(snap_extent_to_pixel))
        LOGGER.info('\t           LL:{} {}'.format(x_min, y_min))
        LOGGER.info('\t           UR:{} {}'.format(x_max, y_max))

    x_cols = int((x_max - x_min) / pixel_size)
    y_rows = int((y_max - y_min) / pixel_size)
    LOGGER.info('\t           Cols: {}'.format(x_cols))
    LOGGER.info('\t           Rows: {}'.format(y_rows))

    # for pixel data types see http://www.gdal.org/gdal_8h.html#a22e22ce0a55036a96f652765793fb7a4
    tif_drv = gdal.GetDriverByName('GTiff')
    if tif_drv is None:
        # If the above Fails then here is an alternate version.
        tif_drv = Find_GDALDriverByName('GTiff')

    raster_ds = tif_drv.Create(out_rasterfilename, x_cols, y_rows, 1, gdal.GDT_Int16)

    # Define spatial reference
    raster_ds.SetProjection(shp_layer.GetSpatialRef().ExportToWkt())
    ras_y_max = y_min + (y_rows * pixel_size)
    raster_ds.SetGeoTransform((x_min, pixel_size, 0, ras_y_max, 0, -pixel_size))

    r_band = raster_ds.GetRasterBand(1)
    r_band.SetNoDataValue(nodata_val)
    r_band.Fill(nodata_val)

    # Rasterize the shapefile layer to our new dataset
    # options = ['ALL_TOUCHED=TRUE','ATTRIBUTE=id']
    err = gdal.RasterizeLayer(raster_ds,  # output to our new dataset
                              [1],  # output to our new datasets first band
                              shp_layer,  # rasterize this layer
                              None, None,  # ignore transformations since we're in same projection
                              burn_values=[1],  # burn value 1
                              options=['ALL_TOUCHED=FALSE'])

    if err != 0:
        LOGGER.exception("Error rasterizing layer: {}".format(err), True)

    else:
        LOGGER.info('{:<30}\t{dur:<15}\t{}'.format(inspect.currentframe().f_code.co_name, '',
                                                   dur=timedelta(seconds=time.time() - start_time)))

    # close the data sources
    r_band = None
    raster_ds = None
    shp_layer = None
    shp_ds = None

    return


[docs]def convert_grid_to_vesper(in_rasterfilename, out_vesperfilename):
    """ Convert a Grid the format compatible with vesper.

       The vesper format contains the xy coordinates for the center of each pixel with one space
       before the x and three spaces between the x and y values.

       The coordinates are written as rows from the Upper Left corner of the raster.

       Args:
           in_rasterfilename (str):An input raster file
           out_vesperfilename (str): Save the output vesper file to this file.

    """
    start_time = time.time()

    if not os.path.exists(in_rasterfilename):
        raise IOError("Invalid path: {}".format(in_rasterfilename))

    import rasterio  # for some reason this works better here than at the top
    with rasterio.open(os.path.normpath(in_rasterfilename)) as oRasterDS:
        band1 = oRasterDS.read(1)
        with open(out_vesperfilename, 'w') as vesFile:
            for iRowY in range(oRasterDS.height):  # get starting row
                for iColX in (range(oRasterDS.width)):  # process through cols
                    val = band1[iRowY, iColX]
                    if val > oRasterDS.nodatavals:
                        # Get XY coordinates for current row/col
                        # takes rows,cols and returns xy coord for cell center
                        xy = oRasterDS.xy(iRowY, iColX)
                        vesFile.write(' {}   {}\n'.format(xy[0], xy[1]))

    LOGGER.info('{:<30}\t{dur:<15}\t{}'.format(inspect.currentframe().f_code.co_name, '',
                                               dur=timedelta(seconds=time.time() - start_time)))
    return


[docs]def add_point_geometry_to_dataframe(in_dataframe, coord_columns=None,
                                    coord_columns_epsg=0, out_epsg=0):
    """ Add Point geometry to a pandas or geopandas data frame through sets of coordinate columns.
       If required, it will also reproject to the chosen EPSG coordinate system

    Args:
        in_dataframe (DataFrame): The input dataframe. This could be a pandas or geopandas dataframe
        coord_columns (list) : A list of the columns representing coordinates. If None, it will be
                      predicted.
        coord_columns_epsg (int):  The EPSG number representing the coordinates inside the csv file
        out_epsg (int): The EPSG number representing the required output coordinates system.
                        Use  0 for no projection
    Returns:
        (GeoDataFrame): A geodataframe of Points with ALL attributes.

    Modified from: https://gis.stackexchange.com/a/182234

    """
    start_time = time.time()

    if not isinstance(in_dataframe, (geopandas.GeoDataFrame, pd.DataFrame)):
        raise TypeError("Input points should be a geopandas dataframe")

    for argCheck in [('coord_columns_epsg', coord_columns_epsg), ('out_epsg', out_epsg)]:
        if not isinstance(argCheck[1], (int, long)):
            raise TypeError('{} must be a integer.'.format(argCheck[0]))

    if coord_columns is None:
        coord_columns = []

    if not isinstance(coord_columns, list):
        raise TypeError('Coordinate columns should be a list.'.format(coord_columns))

    elif len(coord_columns) > 0:
        for eaFld in coord_columns:
            if eaFld not in in_dataframe.columns:
                raise TypeError('Column {} does not exist'.format(eaFld))
        x_column, y_column = coord_columns

    else:
        x_column, y_column = predictCoordinateColumnNames(in_dataframe.columns)

    if 'FID' not in in_dataframe.columns:
        # insert feature id as column as first column and populate it using row number.
        in_dataframe.insert(0, 'FID', in_dataframe.index)

    # combine lat and lon column to a shapely Point() object
    in_dataframe['geometry'] = in_dataframe.apply(
        lambda x: Point((float(x[x_column]), float(x[y_column]))), axis=1)

    # Now, convert the pandas DataFrame into a GeoDataFrame. The geopandas constructor expects a
    # geometry column which can consist of shapely geometry objects, so the column we created is
    # just fine:
    gdf_csv = geopandas.GeoDataFrame(in_dataframe, geometry='geometry',
                                     crs=from_epsg(coord_columns_epsg))

    # drop the original geometry columns to avoid confusion
    gdf_csv.drop([x_column, y_column], axis=1, inplace=True)

    gdf_crs = pyprecag_crs.crs()
    gdf_crs.getFromEPSG(coord_columns_epsg)

    if out_epsg == -1:
        xmin, ymin, _, _ = gdf_csv.total_bounds
        out_epsg = pyprecag_crs.getProjectedCRSForXY(xmin, ymin, coord_columns_epsg).epsg_number

    if out_epsg > 0:
        gdf_csv = gdf_csv.to_crs(epsg=out_epsg)
        gdf_crs.getFromEPSG(out_epsg)

    return gdf_csv, gdf_crs


[docs]def convert_csv_to_points(in_csvfilename, out_shapefilename=None, coord_columns=None,
                          coord_columns_epsg=4326, out_epsg=0):
    """ Converts a comma or tab delimited file to a shapefile.

    If EPSG is omitted it will default to 0 or unknown.

    Args:
        in_csvfilename (str):   Input CSV or Txt (tab) file.
        out_shapefilename (str):  Output Shapefile Name. If path is excluded it uses TEMPDIR
        coord_columns (list) : Column list representing coordinates. If None, it will be predicted.
        coord_columns_epsg (int):  EPSG number for specified coord_columns. Default is 4326 (WGS84)
        out_epsg (int):  The EPSG number representing the required output coordinates system.
                        - OR -
                        0  is no reprojection
                        -1 calculate utm zonal projection

    Returns:
        (GeoDataFrame): A geodataframe of Points with ALL attributes.
        (pyprecag_crs): The coordinate system details of the dataframe

    Modified from:  https://gis.stackexchange.com/a/182234

    """
    start_time = time.time()

    if not os.path.exists(in_csvfilename):
        raise IOError("Invalid path: {}".format(in_csvfilename))

    for argCheck in [('coord_columns_epsg', coord_columns_epsg), ('out_epsg', out_epsg)]:
        if not isinstance(argCheck[1], (int, long)):
            raise TypeError('{} must be a integer.'.format(argCheck[0]))

    desc_csv = CsvDescribe(in_csvfilename)
    pdf_csv = desc_csv.open_pandas_dataframe()

    gdf_csv, gdf_crs = add_point_geometry_to_dataframe(pdf_csv, coord_columns,
                                                       coord_columns_epsg, out_epsg)

    if config.get_debug_mode() or out_shapefilename is not None:
        if out_shapefilename is None:
            out_shapefilename = '{}_{}.shp'.format(
                os.path.splitext(os.path.basename(in_csvfilename))[0],
                inspect.getframeinfo(inspect.currentframe())[2])

        save_geopandas_tofile(gdf_csv, out_shapefilename, overwrite=True)

    LOGGER.info('{:<30}\t{dur:<15}\t{}'.format(inspect.currentframe().f_code.co_name, '',
                                               dur=timedelta(seconds=time.time() - start_time)))

    return gdf_csv, gdf_crs


[docs]def convert_polygon_feature_to_raster(feature, pixel_size, value=1, nodata_val=-9999,
                                      snap_extent_to_pixel=True):
    """ Convert a feature into a raster (block grid)

    The "value" argument can be a numeric, or the name of a feature column. If the feature column
    is a string, then the index value will be used to assign a value to the raster.

    By default, snap_extent_to_pixel is true and is used to snap the bounding coordinates to
    a pixel edge.

    Args:
       feature (pandas.core.series.Series): a polygon feature to convert to a raster
       pixel_size (int, float]: the pixel size for the raster
       value (int, float, str): pixel value. A column from the feature can be used
       nodata_val (int, float): nodata value
       snap_extent_to_pixel (bool): round the extent coordinates to be divisible by the pixel size

    Returns:
       burned (numpy.ndarray) : The array representing the raster data
       Dict[str, int]] : The rasterio meta kwargs required for writing the array to disk.
    """
    if not isinstance(feature, (pd.Series, geopandas.GeoSeries)):
        raise TypeError("Input feature should be a geopandas series")

    if not isinstance(pixel_size, (int, long, float)):
        raise TypeError('Pixel size must be an integer or floating number.')

    if isinstance(value, str) and 'value' not in list(feature.index):
        raise TypeError('Value string {} is not a column name')
    elif not isinstance(value, (int, long, float)):
        raise TypeError('Value should be a column name, or a number')

    transform, width, height, bbox = create_raster_transform(feature.geometry.bounds,
                                                             pixel_size, snap_extent_to_pixel)

    gdf_feat = GeoDataFrame(GeoSeries(feature.geometry),
                            geometry=GeoSeries(feature.geometry).geometry)

    # create an affine transformation matrix to associate the array to the coordinates.
    if value in gdf_feat.columns:
        if is_string_dtype(gdf_feat[value]):
            shapes = ((geom, val) for geom, val in zip(gdf_feat.geometry, (gdf_feat.index + 1)))
        else:
            shapes = ((geom, val) for geom, val in zip(gdf_feat.geometry, gdf_feat[value]))
    else:
        shapes = ((geom, value) for geom in gdf_feat.geometry)

    burned = features.rasterize(shapes=shapes, out_shape=(height, width), fill=nodata_val,
                                transform=transform)
    meta = {}
    meta.update(height=height, width=width,
                transform=transform,
                nodata=nodata_val,
                dtype=rasterio.dtypes.get_minimum_dtype(burned))

    return burned, meta