# -*- coding: utf-8 -*-
"""
Format conversion routines.
"""
from datetime import timedelta
import inspect
import logging
import math
import os
import time
import numpy as np
import pandas as pd
from pandas.core.dtypes.common import is_string_dtype
from osgeo import ogr, gdal
import geopandas
from geopandas import GeoDataFrame, GeoSeries
import rasterio
from rasterio import features
from fiona import collection as fionacoll
from fiona.crs import from_epsg
from shapely.geometry import Point, mapping, shape, LineString
from . import crs as pyprecag_crs
from . import TEMPDIR, config
from .describe import CsvDescribe, predictCoordinateColumnNames, VectorDescribe, \
save_geopandas_tofile
from .errors import GeometryError
from .raster_ops import raster_snap_extent, create_raster_transform
LOGGER = logging.getLogger(__name__)
LOGGER.addHandler(logging.NullHandler())
[docs]def numeric_pixelsize_to_string(pixelsize_metres):
"""
Convert a numeric pixel size to a string with the appropriate units for use in file names.
For example 0.40 is 40cm, 0.125 is 125mm, 1.5km is 1500m, 2.0 is 2m, 6000 is 6km
Args:
pixelsize_metres (numeric): The pixel size in meters.
Returns (str): a string representation of the pixel size
"""
if isinstance(pixelsize_metres, str):
raise ValueError('Pixel size should be numeric (int, float etc)')
if pixelsize_metres % 1000.0 == 0: # or km
result = '{}km'.format(int(pixelsize_metres / 1000))
elif pixelsize_metres % 1 == 0: # is in m
result = '{}m'.format(int(pixelsize_metres))
elif (pixelsize_metres * 100.0) % 1 == 0: # is in cm
result = '{}cm'.format(int(pixelsize_metres * 100))
else: # or mm
result = '{}mm'.format(int(pixelsize_metres * 1000))
return result
[docs]def point_to_point_bearing(pt1, pt2):
"""Calculate bearing from north between two points """
# variation of https://community.esri.com/thread/27393
if isinstance(pt1, tuple):
pt1 = Point(pt1)
if isinstance(pt2, tuple):
pt2 = Point(pt2)
bearing = 180 + math.atan2((pt2.x - pt1.x), (pt2.y - pt1.y)) * (180 / math.pi)
return bearing
[docs]def line_bearing(line):
""" The bearing on the line between the start and end nodes/points"""
if not isinstance(line, LineString):
line = line.geometry
return point_to_point_bearing(line.coords[-1], line.coords[0])
[docs]def deg_to_16_compass_pts(degrees):
"""
Convert a compass bearing from north using 16 cardinal directions
Args:
degrees (float): Degrees from north
Returns:
direction(str) : letters representing the compass point
source: https://stackoverflow.com/a/7490772
"""
# fix if a negative number is used ie -45 turns into 315
if degrees < 0:
degrees = 360.0 + degrees
val = int((degrees / 22.5) + .5)
arr = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW",
"NW", "NNW"]
return arr[(val % 16)]
[docs]def deg_to_8_compass_pts(degrees):
"""
Convert a compass bearing from north using 8 cardinal directions
Args:
degrees (float): Degrees from north
Returns:
direction(str) : letters representing the compass point
source: variation of https://stackoverflow.com/a/7490772
"""
# fix if a negative number is used
if degrees < 0:
degrees = 360.0 + degrees
val = int((degrees / 45.0) + .5)
arr = ["N", "NE", "E", "SE", "S", "SW", "W", "NW"]
return arr[(val % 8)]
[docs]def drop_z(geom):
if isinstance(geom, LineString):
return LineString([xy[0:2] for xy in list(geom.coords)])
if isinstance(geom, Point):
return Point([geom.coords[0:2]])
[docs]def text_rotation(pt1, pt2):
# calculate an angle to use for labelling in matplotlib plots
# https://stackoverflow.com/a/35825527
rotation = np.rad2deg(np.arctan2(pt2.y - pt1.y, pt2.x - pt1.x))
# need to adjust slighlt to avoid upside down labels
if rotation < -100:
rotation = np.rad2deg(np.arctan2(pt1.y - pt2.y, pt1.x - pt2.x))
elif rotation >= 170:
rotation = rotation - 180
return rotation
[docs]def convert_polygon_to_grid(in_shapefilename,
out_rasterfilename,
pixel_size,
nodata_val=-9999,
snap_extent_to_pixel=True,
overwrite=True):
""" Convert a polygon shapefile to a raster for a set pixel size.
source : http://ceholden.github.io/open-geo-tutorial/python/chapter_4_vector.html
Args:
in_shapefilename (str): A polygon shapefile to rasterise
out_rasterfilename (str): the output raster name
pixel_size (float): the pixel size for the raster
nodata_val (int):the value to use for nodata
snap_extent_to_pixel (bool):round the extent coordinates to be divisible by the pixel size
overwrite (bool): if true overwrite existing output file
Returns:
None:
"""
start_time = time.time()
if not os.path.exists(in_shapefilename):
raise IOError("Invalid path: {}".format(in_shapefilename))
shp_ds = ogr.Open(in_shapefilename)
shp_layer = shp_ds.GetLayer()
x_min, x_max, y_min, y_max = shp_layer.GetExtent()
LOGGER.info('\tVector Extent: LL:{} {}'.format(x_min, y_min))
LOGGER.info('\t UR:{} {}'.format(x_max, y_max))
# Snap output grids to a multiple of the grid size, allowing adjacent blocks to align nicely.
if snap_extent_to_pixel:
x_min, y_min, x_max, y_max = raster_snap_extent(x_min, y_min, x_max, y_max, pixel_size)
LOGGER.info('\tRaster Extent: Snap is {}'.format(snap_extent_to_pixel))
LOGGER.info('\t LL:{} {}'.format(x_min, y_min))
LOGGER.info('\t UR:{} {}'.format(x_max, y_max))
x_cols = int((x_max - x_min) / pixel_size)
y_rows = int((y_max - y_min) / pixel_size)
LOGGER.info('\t Cols: {}'.format(x_cols))
LOGGER.info('\t Rows: {}'.format(y_rows))
# for pixel data types see http://www.gdal.org/gdal_8h.html#a22e22ce0a55036a96f652765793fb7a4
tif_drv = gdal.GetDriverByName('GTiff')
if tif_drv is None:
# If the above Fails then here is an alternate version.
tif_drv = Find_GDALDriverByName('GTiff')
raster_ds = tif_drv.Create(out_rasterfilename, x_cols, y_rows, 1, gdal.GDT_Int16)
# Define spatial reference
raster_ds.SetProjection(shp_layer.GetSpatialRef().ExportToWkt())
ras_y_max = y_min + (y_rows * pixel_size)
raster_ds.SetGeoTransform((x_min, pixel_size, 0, ras_y_max, 0, -pixel_size))
r_band = raster_ds.GetRasterBand(1)
r_band.SetNoDataValue(nodata_val)
r_band.Fill(nodata_val)
# Rasterize the shapefile layer to our new dataset
# options = ['ALL_TOUCHED=TRUE','ATTRIBUTE=id']
err = gdal.RasterizeLayer(raster_ds, # output to our new dataset
[1], # output to our new datasets first band
shp_layer, # rasterize this layer
None, None, # ignore transformations since we're in same projection
burn_values=[1], # burn value 1
options=['ALL_TOUCHED=FALSE'])
if err != 0:
LOGGER.exception("Error rasterizing layer: {}".format(err), True)
else:
LOGGER.info('{:<30}\t{dur:<15}\t{}'.format(inspect.currentframe().f_code.co_name, '',
dur=timedelta(seconds=time.time() - start_time)))
# close the data sources
r_band = None
raster_ds = None
shp_layer = None
shp_ds = None
return
[docs]def convert_grid_to_vesper(in_rasterfilename, out_vesperfilename):
""" Convert a Grid the format compatible with vesper.
The vesper format contains the xy coordinates for the center of each pixel with one space
before the x and three spaces between the x and y values.
The coordinates are written as rows from the Upper Left corner of the raster.
Args:
in_rasterfilename (str):An input raster file
out_vesperfilename (str): Save the output vesper file to this file.
"""
start_time = time.time()
if not os.path.exists(in_rasterfilename):
raise IOError("Invalid path: {}".format(in_rasterfilename))
import rasterio # for some reason this works better here than at the top
with rasterio.open(os.path.normpath(in_rasterfilename)) as oRasterDS:
band1 = oRasterDS.read(1)
with open(out_vesperfilename, 'w') as vesFile:
for iRowY in range(oRasterDS.height): # get starting row
for iColX in (range(oRasterDS.width)): # process through cols
val = band1[iRowY, iColX]
if val > oRasterDS.nodatavals:
# Get XY coordinates for current row/col
# takes rows,cols and returns xy coord for cell center
xy = oRasterDS.xy(iRowY, iColX)
vesFile.write(' {} {}\n'.format(xy[0], xy[1]))
LOGGER.info('{:<30}\t{dur:<15}\t{}'.format(inspect.currentframe().f_code.co_name, '',
dur=timedelta(seconds=time.time() - start_time)))
return
[docs]def add_point_geometry_to_dataframe(in_dataframe, coord_columns=None,
coord_columns_epsg=0, out_epsg=0):
""" Add Point geometry to a pandas or geopandas data frame through sets of coordinate columns.
If required, it will also reproject to the chosen EPSG coordinate system
Args:
in_dataframe (DataFrame): The input dataframe. This could be a pandas or geopandas dataframe
coord_columns (list) : A list of the columns representing coordinates. If None, it will be
predicted.
coord_columns_epsg (int): The EPSG number representing the coordinates inside the csv file
out_epsg (int): The EPSG number representing the required output coordinates system.
Use 0 for no projection
Returns:
(GeoDataFrame): A geodataframe of Points with ALL attributes.
Modified from: https://gis.stackexchange.com/a/182234
"""
start_time = time.time()
if not isinstance(in_dataframe, (geopandas.GeoDataFrame, pd.DataFrame)):
raise TypeError("Input points should be a geopandas dataframe")
for argCheck in [('coord_columns_epsg', coord_columns_epsg), ('out_epsg', out_epsg)]:
if not isinstance(argCheck[1], (int, long)):
raise TypeError('{} must be a integer.'.format(argCheck[0]))
if coord_columns is None:
coord_columns = []
if not isinstance(coord_columns, list):
raise TypeError('Coordinate columns should be a list.'.format(coord_columns))
elif len(coord_columns) > 0:
for eaFld in coord_columns:
if eaFld not in in_dataframe.columns:
raise TypeError('Column {} does not exist'.format(eaFld))
x_column, y_column = coord_columns
else:
x_column, y_column = predictCoordinateColumnNames(in_dataframe.columns)
if 'FID' not in in_dataframe.columns:
# insert feature id as column as first column and populate it using row number.
in_dataframe.insert(0, 'FID', in_dataframe.index)
# combine lat and lon column to a shapely Point() object
in_dataframe['geometry'] = in_dataframe.apply(
lambda x: Point((float(x[x_column]), float(x[y_column]))), axis=1)
# Now, convert the pandas DataFrame into a GeoDataFrame. The geopandas constructor expects a
# geometry column which can consist of shapely geometry objects, so the column we created is
# just fine:
gdf_csv = geopandas.GeoDataFrame(in_dataframe, geometry='geometry',
crs=from_epsg(coord_columns_epsg))
# drop the original geometry columns to avoid confusion
gdf_csv.drop([x_column, y_column], axis=1, inplace=True)
gdf_crs = pyprecag_crs.crs()
gdf_crs.getFromEPSG(coord_columns_epsg)
if out_epsg == -1:
xmin, ymin, _, _ = gdf_csv.total_bounds
out_epsg = pyprecag_crs.getProjectedCRSForXY(xmin, ymin, coord_columns_epsg).epsg_number
if out_epsg > 0:
gdf_csv = gdf_csv.to_crs(epsg=out_epsg)
gdf_crs.getFromEPSG(out_epsg)
return gdf_csv, gdf_crs
[docs]def convert_csv_to_points(in_csvfilename, out_shapefilename=None, coord_columns=None,
coord_columns_epsg=4326, out_epsg=0):
""" Converts a comma or tab delimited file to a shapefile.
If EPSG is omitted it will default to 0 or unknown.
Args:
in_csvfilename (str): Input CSV or Txt (tab) file.
out_shapefilename (str): Output Shapefile Name. If path is excluded it uses TEMPDIR
coord_columns (list) : Column list representing coordinates. If None, it will be predicted.
coord_columns_epsg (int): EPSG number for specified coord_columns. Default is 4326 (WGS84)
out_epsg (int): The EPSG number representing the required output coordinates system.
- OR -
0 is no reprojection
-1 calculate utm zonal projection
Returns:
(GeoDataFrame): A geodataframe of Points with ALL attributes.
(pyprecag_crs): The coordinate system details of the dataframe
Modified from: https://gis.stackexchange.com/a/182234
"""
start_time = time.time()
if not os.path.exists(in_csvfilename):
raise IOError("Invalid path: {}".format(in_csvfilename))
for argCheck in [('coord_columns_epsg', coord_columns_epsg), ('out_epsg', out_epsg)]:
if not isinstance(argCheck[1], (int, long)):
raise TypeError('{} must be a integer.'.format(argCheck[0]))
desc_csv = CsvDescribe(in_csvfilename)
pdf_csv = desc_csv.open_pandas_dataframe()
gdf_csv, gdf_crs = add_point_geometry_to_dataframe(pdf_csv, coord_columns,
coord_columns_epsg, out_epsg)
if config.get_debug_mode() or out_shapefilename is not None:
if out_shapefilename is None:
out_shapefilename = '{}_{}.shp'.format(
os.path.splitext(os.path.basename(in_csvfilename))[0],
inspect.getframeinfo(inspect.currentframe())[2])
save_geopandas_tofile(gdf_csv, out_shapefilename, overwrite=True)
LOGGER.info('{:<30}\t{dur:<15}\t{}'.format(inspect.currentframe().f_code.co_name, '',
dur=timedelta(seconds=time.time() - start_time)))
return gdf_csv, gdf_crs
[docs]def convert_polygon_feature_to_raster(feature, pixel_size, value=1, nodata_val=-9999,
snap_extent_to_pixel=True):
""" Convert a feature into a raster (block grid)
The "value" argument can be a numeric, or the name of a feature column. If the feature column
is a string, then the index value will be used to assign a value to the raster.
By default, snap_extent_to_pixel is true and is used to snap the bounding coordinates to
a pixel edge.
Args:
feature (pandas.core.series.Series): a polygon feature to convert to a raster
pixel_size (int, float]: the pixel size for the raster
value (int, float, str): pixel value. A column from the feature can be used
nodata_val (int, float): nodata value
snap_extent_to_pixel (bool): round the extent coordinates to be divisible by the pixel size
Returns:
burned (numpy.ndarray) : The array representing the raster data
Dict[str, int]] : The rasterio meta kwargs required for writing the array to disk.
"""
if not isinstance(feature, (pd.Series, geopandas.GeoSeries)):
raise TypeError("Input feature should be a geopandas series")
if not isinstance(pixel_size, (int, long, float)):
raise TypeError('Pixel size must be an integer or floating number.')
if isinstance(value, str) and 'value' not in list(feature.index):
raise TypeError('Value string {} is not a column name')
elif not isinstance(value, (int, long, float)):
raise TypeError('Value should be a column name, or a number')
transform, width, height, bbox = create_raster_transform(feature.geometry.bounds,
pixel_size, snap_extent_to_pixel)
gdf_feat = GeoDataFrame(GeoSeries(feature.geometry),
geometry=GeoSeries(feature.geometry).geometry)
# create an affine transformation matrix to associate the array to the coordinates.
if value in gdf_feat.columns:
if is_string_dtype(gdf_feat[value]):
shapes = ((geom, val) for geom, val in zip(gdf_feat.geometry, (gdf_feat.index + 1)))
else:
shapes = ((geom, val) for geom, val in zip(gdf_feat.geometry, gdf_feat[value]))
else:
shapes = ((geom, value) for geom in gdf_feat.geometry)
burned = features.rasterize(shapes=shapes, out_shape=(height, width), fill=nodata_val,
transform=transform)
meta = {}
meta.update(height=height, width=width,
transform=transform,
nodata=nodata_val,
dtype=rasterio.dtypes.get_minimum_dtype(burned))
return burned, meta