In [37]:
# Tutorial for using rasterio and numpy to extract the footprint of a band
# mask and export it as a GeoJSON file to serve as a gdalwarp cutline.
# 
# Rasterio functions used are open(), read_mask(), sieve(), shapes()
#
# See the question at:
#
# http://gis.stackexchange.com/questions/90481/mask-image-based-on-no-data-pixels-in-another-image-with-different-projection
#
# Rasterio's home is https://github.com/mapbox/rasterio.
 
import rasterio

# Matplotlib is only used to make the pictures. Not required for processing.
import matplotlib.pyplot as plt

# I'll use rasterio's test image in this tutorial. It is a UTM Zone 18
# RGB GeoTIFF containing a swath of imagery on a 0,0,0 background. 
# Some valid image values are also 0, which gives me an opportunity to
# show how another rasterio feature can eliminate small holes in the image's
# valid data mask.
In [38]:
# First, get the GDAL band mask from rasterio's test image.
with rasterio.open('rasterio/tests/data/RGB.byte.tif') as src:
    mask = src.read_mask()

plt.imshow(mask)
Out[38]:
<matplotlib.image.AxesImage at 0x115e7ced0>
In [39]:
# Red is valid data, blue nodata.
# Oh look, there's a defect in the mask. I want a simple polygon footprint,
# so I'll sieve out those holes.

import numpy
from rasterio.features import sieve

sieved_mask = sieve(mask, 500)
plt.imshow(sieved_mask)
Out[39]:
<matplotlib.image.AxesImage at 0x115eb21d0>
In [40]:
# Now I'll call shapes(), which uses GDAL's polygonizer, to get the shape
# of the valid data area (value of 255) in the units of the input file.
# Shapes() returns an iterator over features of the image. There's only
# one with a non-zero value in this case.

from rasterio.features import shapes

geom = next(g for g, v in shapes(sieved_mask, transform=src.transform) if v > 0)     
In [41]:
print geoms.keys()
print geoms[255].keys()
[0, 255]
['type', 'coordinates']
In [42]:
# Make a GeoJSON file with one feature and the EPSG:26918 crs.
import json
f = {'type': 'Feature', 'id': '255', 'properties': {}, 'geometry': geoms[255]}
crs = {'type': 'name', 'properties': {'name': 'urn:ogc:def:crs:epsg::26918'}}
text = json.dumps({'type': 'FeatureCollection', 'crs': crs, 'features': [f]})
with open('/tmp/footprint.json', 'w') as dst:
    dst.write(text)
In [43]:
# Verify with ogrinfo that this will work as a cutline.
import subprocess
print subprocess.check_output(['ogrinfo', '-so', '/tmp/footprint.json', 'OGRGeoJSON'])
Had to open data source read-only.
INFO: Open of `/tmp/footprint.json'
      using driver `GeoJSON' successful.

Layer name: OGRGeoJSON
Geometry: Polygon
Feature Count: 1
Extent: (105885.493047, 2612685.167131) - (333014.203540, 2826014.874652)
Layer SRS WKT:
PROJCS["NAD83 / UTM zone 18N",
    GEOGCS["NAD83",
        DATUM["North_American_Datum_1983",
            SPHEROID["GRS 1980",6378137,298.257222101,
                AUTHORITY["EPSG","7019"]],
            TOWGS84[0,0,0,0,0,0,0],
            AUTHORITY["EPSG","6269"]],
        PRIMEM["Greenwich",0,
            AUTHORITY["EPSG","8901"]],
        UNIT["degree",0.0174532925199433,
            AUTHORITY["EPSG","9122"]],
        AUTHORITY["EPSG","4269"]],
    PROJECTION["Transverse_Mercator"],
    PARAMETER["latitude_of_origin",0],
    PARAMETER["central_meridian",-75],
    PARAMETER["scale_factor",0.9996],
    PARAMETER["false_easting",500000],
    PARAMETER["false_northing",0],
    UNIT["metre",1,
        AUTHORITY["EPSG","9001"]],
    AXIS["Easting",EAST],
    AXIS["Northing",NORTH],
    AUTHORITY["EPSG","26918"]]

In [43]: