Data Preparation 

&
Vector Geoprocessing

These code snippets demonstrate how to automate geospatial workflows using Python, Pandas, and GeoPandas. It merges soil survey data from multiple feature classes, joins tabular attributes, and intersects the combined dataset with watershed boundaries. Finally, it calculates the number of polygons within each watershed—illustrating a typical vector-based geoprocessing routine for environmental or land-use analyses. 

import geopandas as gpd

import pandas as pd


# Part I

    # Join each of the tables to the corresponding feature class. Find a common field to join on.

    # Add a new field named mapunit to each joined feature class and calculate the values for all features as the map unit ID of the feature class.

    # Merge the 9 feature classes into a new feature class.

    # Intersect the merged feature class with the watershed boundaries.


# Prepare suffixes for feature classes and tables

suffixes = ['co001', 'co618', 'co641', 'co642', 'co643', 'co644', 'co645', 'co651', 'co653']


# Create an empty list to store GeoDataFrames

gdfs = []


# Loop through suffixes to join feature classes with corresponding tables

for suffix in suffixes:

    # Load feature class (spatial data)

    soil_fc = gpd.read_file("Lab 2/lab2.gpkg", layer=f"soilmu_a_{suffix}")


    # Load table (attribute data)

    soil_table = gpd.read_file("Lab 2/lab2.gpkg", layer=f"muaggatt_{suffix}")


    # Perform join using MUSYM (uppercase) and musym (lowercase) as common fields

    joined_fc = soil_fc.merge(soil_table, left_on='MUSYM', right_on='musym', how='left')


    # Add 'mapunit' field with the suffix (e.g., 'co641')

    joined_fc['mapunit'] = suffix


    # Append joined GeoDataFrame to list

    gdfs.append(joined_fc)


# Merge all GeoDataFrames into one

merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=gdfs[0].crs)


# Load watershed boundaries

watersheds = gpd.read_file("Lab 2/lab2.gpkg", layer="wbdhu8_lab1")


# Perform intersection between SSURGO polygons and watershed boundaries

intersected = gpd.overlay(merged_gdf, watersheds, how='intersection')


# Part II

    # Find the number of features in the resulting intersected feature class that correspond to each watershed.

    # Print these counts.


# Count features per watershed

counts = intersected['NAME'].value_counts()


# Display final results

print("\nFinal Count of SSURGO Polygons per Watershed:")

for watershed, count in counts.items():

    print(f"{watershed}: {count} polygons")

print() # Formatting