Examples: Weather Data Access¶
This page contains examples of using DataIO to access and download weather data with spatial and temporal filtering.
Setting up the Client¶
from dataio import DataIOAPI
# Method 1: Using environment variables (recommended)
client = DataIOAPI()
# Method 2: Passing credentials directly
client = DataIOAPI(
base_url="https://dataio.artpark.ai/api/v1",
api_key="your_api_key_here",
data_dir="data"
)
For Method 1, create a .env file in your project root:
DATAIO_API_BASE_URL=https://dataio.artpark.ai/api/v1
DATAIO_API_KEY=your_api_key_here
DATAIO_DATA_DIR=data
Discovering Available Weather Datasets¶
List all available weather datasets to see what’s accessible:
# List weather datasets
datasets = client.list_weather_datasets()
print(f"Found {len(datasets)} weather dataset(s)\n")
for dataset in datasets:
print(f"Dataset: {dataset['dataset_name']}")
print(f"Time coverage: {dataset['temporal_coverage_start']} to {dataset['temporal_coverage_end']}")
print(f"Spatial bounds: {dataset['spatial_bounds']}")
print(f"\nAvailable variables:")
for var in dataset['variables']:
print(f" {var['name']:<5} - {var['long_name']}")
print(f" Units: {var['units']}")
print(f" Resolution: {var['spatial_resolution']} (spatial), {var['temporal_resolution']} (temporal)")
print("-" * 70)
Example output:
Found 1 weather dataset(s)
Dataset: era5_sfc
Time coverage: 2000-01-01T00:00:00 to 2025-11-01T06:00:00
Spatial bounds: {'min_lat': 5.0, 'max_lat': 45.0, 'min_lon': 40.0, 'max_lon': 110.0}
Available variables:
d2m - 2 metre dewpoint temperature
Units: K
Resolution: 0.2500 degrees (spatial), hourly (temporal)
t2m - 2 metre temperature
Units: K
Resolution: 0.2500 degrees (spatial), hourly (temporal)
tp - Total precipitation
Units: m
Resolution: 0.2500 degrees (spatial), hourly (temporal)
Example 1: Download Weather Data for a State¶
Download temperature and precipitation data for Karnataka using the region ID:
# Download weather data for Karnataka (state_29)
ds = client.download_weather_data(
dataset_name="era5_sfc",
variables=["t2m", "tp"], # Temperature and precipitation
start_date="2024-01-01",
end_date="2024-01-31",
geojson="state_29" # Karnataka region ID - shapefile fetched automatically
)
print(f"Dataset dimensions: {dict(ds.dims)}")
print(f"Dataset variables: {list(ds.data_vars)}")
# Access temperature data
temperature = ds['t2m']
print(f"\nTemperature statistics:")
print(f" Shape: {temperature.shape}")
print(f" Mean: {temperature.mean().values:.2f} K ({temperature.mean().values - 273.15:.2f} °C)")
print(f" Max: {temperature.max().values:.2f} K ({temperature.max().values - 273.15:.2f} °C)")
print(f" Min: {temperature.min().values:.2f} K ({temperature.min().values - 273.15:.2f} °C)")
# Access precipitation data (convert from meters to mm)
precipitation = ds['tp'] * 1000
print(f"\nPrecipitation statistics:")
print(f" Total: {precipitation.sum().values:.2f} mm")
print(f" Mean per timestep: {precipitation.mean().values:.4f} mm")
Example output:
Weather data saved to: ./data/weather/era5_sfc/era5_sfc_t2m_tp_20240101_20240131_state_29.nc
Dataset dimensions: {'valid_time': 744, 'latitude': 65, 'longitude': 89}
Dataset variables: ['t2m', 'tp']
Temperature statistics:
Shape: (744, 65, 89)
Mean: 297.45 K (24.30 °C)
Max: 308.92 K (35.77 °C)
Min: 283.15 K (10.00 °C)
Precipitation statistics:
Total: 12456.78 mm
Mean per timestep: 0.2892 mm
Example 2: Download for a Custom Bounding Box¶
Create a custom bounding box for a specific area of interest:
# Define a custom bounding box for Western Ghats
western_ghats_bbox = {
"type": "Feature",
"properties": {"region_id": "western_ghats"},
"geometry": {
"type": "Polygon",
"coordinates": [[
[73.0, 8.0], # Southwest corner
[77.0, 8.0], # Southeast corner
[77.0, 16.0], # Northeast corner
[73.0, 16.0], # Northwest corner
[73.0, 8.0] # Close the polygon
]]
}
}
# Download data
ds = client.download_weather_data(
dataset_name="era5_sfc",
variables=["t2m", "d2m", "tp"],
start_date="2024-06-01",
end_date="2024-06-30",
geojson=western_ghats_bbox,
output_dir="./weather_data/monsoon_analysis"
)
print(f"Downloaded data covering:")
print(f" Latitude range: {float(ds.latitude.min())}° to {float(ds.latitude.max())}°")
print(f" Longitude range: {float(ds.longitude.min())}° to {float(ds.longitude.max())}°")
print(f" Time points: {len(ds.valid_time)}")
print(f" Variables: {list(ds.data_vars)}")
Example 3: Using an Existing GeoJSON File¶
If you have a GeoJSON file for your region:
# Download using a geojson file path
ds = client.download_weather_data(
dataset_name="era5_sfc",
variables=["t2m", "d2m"],
start_date="2024-01-01",
end_date="2024-01-07",
geojson="path/to/my_region.geojson"
)
print(f"Weather data downloaded successfully!")
print(f"File saved with {len(ds.valid_time)} timesteps")
Example 4: Time Series Analysis¶
Download and analyze temperature trends over time:
import matplotlib.pyplot as plt
# Download monthly data
ds = client.download_weather_data(
dataset_name="era5_sfc",
variables=["t2m"],
start_date="2024-01-01",
end_date="2024-12-31",
geojson="state_29" # Karnataka
)
# Convert temperature to Celsius
ds['t2m_celsius'] = ds['t2m'] - 273.15
# Calculate daily mean temperature (spatial and temporal average)
daily_temp = ds['t2m_celsius'].resample(valid_time='1D').mean()
spatial_mean = daily_temp.mean(dim=['latitude', 'longitude'])
# Plot time series
plt.figure(figsize=(14, 6))
spatial_mean.plot()
plt.title('Daily Mean Temperature - Karnataka (2024)')
plt.xlabel('Date')
plt.ylabel('Temperature (°C)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('karnataka_temperature_2024.png', dpi=300)
print("Temperature plot saved to karnataka_temperature_2024.png")
# Calculate monthly statistics
monthly_stats = {
'mean': spatial_mean.resample(valid_time='1ME').mean().values,
'max': spatial_mean.resample(valid_time='1ME').max().values,
'min': spatial_mean.resample(valid_time='1ME').min().values
}
print("\nMonthly Temperature Statistics (°C):")
print(f"{'Month':<10} {'Mean':>8} {'Max':>8} {'Min':>8}")
print("-" * 40)
for i, month in enumerate(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']):
print(f"{month:<10} {monthly_stats['mean'][i]:>8.2f} {monthly_stats['max'][i]:>8.2f} {monthly_stats['min'][i]:>8.2f}")
Example 5: Precipitation Analysis¶
Analyze rainfall patterns:
# Download monsoon season data
ds = client.download_weather_data(
dataset_name="era5_sfc",
variables=["tp"],
start_date="2024-06-01",
end_date="2024-09-30",
geojson="state_29" # Karnataka
)
# Convert precipitation from meters to millimeters
ds['tp_mm'] = ds['tp'] * 1000
# Calculate daily total precipitation
daily_precip = ds['tp_mm'].resample(valid_time='1D').sum()
# Calculate spatial average
spatial_mean_precip = daily_precip.mean(dim=['latitude', 'longitude'])
# Find wettest and driest days
wettest_day = spatial_mean_precip.idxmax().values
driest_day = spatial_mean_precip.idxmin().values
print(f"Monsoon Season Precipitation Analysis (June-September 2024)")
print(f"=" * 60)
print(f"Total rainfall: {spatial_mean_precip.sum().values:.2f} mm")
print(f"Average daily rainfall: {spatial_mean_precip.mean().values:.2f} mm")
print(f"Wettest day: {str(wettest_day)[:10]} - {float(spatial_mean_precip.sel(valid_time=wettest_day)):.2f} mm")
print(f"Driest day: {str(driest_day)[:10]} - {float(spatial_mean_precip.sel(valid_time=driest_day)):.2f} mm")
# Count rainy days (> 2.5 mm)
rainy_days = (spatial_mean_precip > 2.5).sum().values
print(f"Number of rainy days (>2.5mm): {rainy_days}")
Example 6: Extracting Data for Specific Locations¶
Extract weather data for specific cities:
# Download data for Karnataka region
ds = client.download_weather_data(
dataset_name="era5_sfc",
variables=["t2m", "d2m", "tp"],
start_date="2024-01-01",
end_date="2024-01-31",
geojson="state_29"
)
# Define city coordinates
cities = {
'Bangalore': (12.97, 77.59),
'Mysore': (12.29, 76.64),
'Mangalore': (12.91, 74.85),
'Hubli': (15.36, 75.13)
}
# Extract data for each city
import pandas as pd
city_data = {}
for city_name, (lat, lon) in cities.items():
city_ds = ds.sel(
latitude=lat,
longitude=lon,
method='nearest'
)
# Create dataframe
df = city_ds.to_dataframe().reset_index()
df['t2m_celsius'] = df['t2m'] - 273.15
df['d2m_celsius'] = df['d2m'] - 273.15
df['tp_mm'] = df['tp'] * 1000
city_data[city_name] = df
# Calculate summary statistics
print(f"\n{city_name}:")
print(f" Avg Temperature: {df['t2m_celsius'].mean():.2f} °C")
print(f" Avg Dewpoint: {df['d2m_celsius'].mean():.2f} °C")
print(f" Total Precipitation: {df['tp_mm'].sum():.2f} mm")
# Save to CSV
output_file = f"{city_name.lower()}_weather_jan2024.csv"
df[['valid_time', 't2m_celsius', 'd2m_celsius', 'tp_mm']].to_csv(
output_file,
index=False
)
print(f" Data saved to: {output_file}")
Example 7: Spatial Visualization¶
Create spatial maps of weather data:
import matplotlib.pyplot as plt
# Download a short time period for visualization
ds = client.download_weather_data(
dataset_name="era5_sfc",
variables=["t2m", "tp"],
start_date="2024-01-15",
end_date="2024-01-15",
geojson="state_29"
)
# Convert units
ds['t2m_celsius'] = ds['t2m'] - 273.15
ds['tp_mm'] = ds['tp'] * 1000
# Calculate daily average
temp_avg = ds['t2m_celsius'].mean(dim='valid_time')
precip_total = ds['tp_mm'].sum(dim='valid_time')
# Create subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
# Plot temperature
temp_avg.plot(ax=ax1, cmap='RdYlBu_r')
ax1.set_title('Average Temperature - Karnataka (2024-01-15)')
ax1.set_xlabel('Longitude')
ax1.set_ylabel('Latitude')
# Plot precipitation
precip_total.plot(ax=ax2, cmap='Blues')
ax2.set_title('Total Precipitation - Karnataka (2024-01-15)')
ax2.set_xlabel('Longitude')
ax2.set_ylabel('Latitude')
plt.tight_layout()
plt.savefig('karnataka_weather_map_20240115.png', dpi=300)
print("Weather map saved to karnataka_weather_map_20240115.png")
Example 8: Batch Download for Multiple Regions¶
Download weather data for multiple states:
# Define multiple states
states = {
'Karnataka': 'state_29',
'Tamil Nadu': 'state_33',
'Kerala': 'state_32',
'Andhra Pradesh': 'state_28'
}
# Download data for each state
for state_name, region_id in states.items():
print(f"\nDownloading data for {state_name}...")
try:
ds = client.download_weather_data(
dataset_name="era5_sfc",
variables=["t2m", "tp"],
start_date="2024-01-01",
end_date="2024-01-31",
geojson=region_id,
output_dir=f"./weather_data/{state_name.lower().replace(' ', '_')}"
)
# Quick summary
temp_mean = (ds['t2m'].mean().values - 273.15)
precip_total = (ds['tp'].sum().values * 1000)
print(f" ✓ Downloaded successfully")
print(f" Average temperature: {temp_mean:.2f} °C")
print(f" Total precipitation: {precip_total:.2f} mm")
except Exception as e:
print(f" ✗ Failed: {e}")
Working with Large Datasets¶
For large downloads, consider processing data in chunks:
import pandas as pd
# Download year of data month by month
months = pd.date_range('2024-01-01', '2024-12-31', freq='MS')
monthly_results = []
for start_date in months:
end_date = start_date + pd.DateOffset(months=1) - pd.DateOffset(days=1)
print(f"Downloading {start_date.strftime('%B %Y')}...")
ds = client.download_weather_data(
dataset_name="era5_sfc",
variables=["t2m"],
start_date=start_date.strftime('%Y-%m-%d'),
end_date=end_date.strftime('%Y-%m-%d'),
geojson="state_29"
)
# Calculate monthly average
monthly_avg = (ds['t2m'].mean().values - 273.15)
monthly_results.append({
'month': start_date.strftime('%B'),
'avg_temp': monthly_avg
})
print(f" Average temperature: {monthly_avg:.2f} °C")
# Create summary dataframe
summary_df = pd.DataFrame(monthly_results)
print("\nAnnual Temperature Summary:")
print(summary_df)
Tips¶
Start with small time ranges (1-7 days) when exploring new datasets
Use region IDs for convenience:
geojson="state_29"instead of loading shapefilesCheck data size: hourly data for a month at 0.25° resolution can be several hundred MB
Convert units: Temperature K to °C:
temp_c = temp_k - 273.15, Precipitation m to mm:precip_mm = precip_m * 1000Use
resample()for temporal aggregation:daily = ds.resample(valid_time='1D').mean()Extract point data with
sel():point = ds.sel(latitude=12.97, longitude=77.59, method='nearest')