Pull electricity grid retail data using EIA API
Sep 27, 2020
python
Compiling electricity grid data on retail price, sales, revenue, and customers in select states.
Website: https://www.eia.gov/opendata/
import pandas as pd
import numpy as np
import requests
import os
from pathlib import Path
base_path = Path.cwd()
API Info
api_key = 'put_your_api_key_here'
states = ['CA','TX','NY','IL']
timeframes = ['A','M'] #annual (A) and monthly (M)
series_ids = {'ELEC.REV':'revenue from retail sales of electricity',
'ELEC.CUSTOMERS':'number of customer accounts',
'ELEC.PRICE':'average retail price of electricity',
'ELEC.SALES':'retail sales of electricity'
}
Pull data
# empty dataframe to fill
all_data = pd.DataFrame({'date': [],
'value': [],
'series': [],
'state': [],
'timeframe': []
})
# pull data for every series, state, and timeframe then concatenate data
for series_id in series_ids:
for state in states:
for timeframe in timeframes:
# set url
url = 'http://api.eia.gov/series/?api_key='+api_key+'&series_id='+series_id+'.'+state+'-RES.'+timeframe+'&out=json'
# pull url
r = requests.get(url)
json_data = r.json()
# clean data
data = pd.DataFrame(json_data.get('series')[0]['data'])
data.rename(columns={0:'date',1:'value'}, inplace=True)
data['series'] = series_id
data['state'] = state
data['timeframe'] = timeframe
# concatenate
all_data = all_data.append(data, ignore_index=True)
Basic data cleaning
# annual data: unpivot, set column names, and reset index
annual_data = all_data.loc[all_data['timeframe'] == 'A']
annual_data = annual_data.pivot_table(index=['date','state'],columns=['series'], values=['value'])
annual_data.columns = annual_data.columns.get_level_values(1)
annual_data.reset_index(inplace=True)
# monthly data: unpivot, set column names, reset index, and convert date to date type
monthly_data = all_data.loc[all_data['timeframe'] == 'M']
monthly_data = monthly_data.pivot_table(index=['date','state'],columns=['series'], values=['value'])
monthly_data.columns = monthly_data.columns.get_level_values(1)
monthly_data.reset_index(inplace=True)
monthly_data['date'] = pd.to_datetime(monthly_data['date'], format='%Y%m')
Output as csv files
annual_data.to_csv('electricity_grid_annual.csv', index=False)
monthly_data.to_csv('electricity_grid_monthly.csv', index=False)