Pull electricity grid retail data using EIA API

Sep 27, 2020 python

Compiling electricity grid data on retail price, sales, revenue, and customers in select states.

Website: https://www.eia.gov/opendata/

import pandas as pd
import numpy as np
import requests
import os
from pathlib import Path
base_path = Path.cwd()

API Info

api_key = 'put_your_api_key_here'
states = ['CA','TX','NY','IL']
timeframes = ['A','M'] #annual (A) and monthly (M)
series_ids = {'ELEC.REV':'revenue from retail sales of electricity',
              'ELEC.CUSTOMERS':'number of customer accounts',
              'ELEC.PRICE':'average retail price of electricity',
              'ELEC.SALES':'retail sales of electricity'
             }

Pull data

# empty dataframe to fill
all_data = pd.DataFrame({'date': [],
                       'value': [],
                       'series': [],
                       'state': [],
                       'timeframe': []
                      })

# pull data for every series, state, and timeframe then concatenate data
for series_id in series_ids:
    for state in states:
        for timeframe in timeframes:
            
            # set url
            url = 'http://api.eia.gov/series/?api_key='+api_key+'&series_id='+series_id+'.'+state+'-RES.'+timeframe+'&out=json'
            
            # pull url
            r = requests.get(url)
            json_data = r.json()
            
            # clean data
            data = pd.DataFrame(json_data.get('series')[0]['data'])
            data.rename(columns={0:'date',1:'value'}, inplace=True)
            data['series'] = series_id
            data['state'] = state
            data['timeframe'] = timeframe
            
            # concatenate
            all_data = all_data.append(data, ignore_index=True)

Basic data cleaning

# annual data: unpivot, set column names, and reset index
annual_data = all_data.loc[all_data['timeframe'] == 'A']
annual_data = annual_data.pivot_table(index=['date','state'],columns=['series'], values=['value'])
annual_data.columns = annual_data.columns.get_level_values(1)
annual_data.reset_index(inplace=True)

# monthly data: unpivot, set column names, reset index, and convert date to date type
monthly_data = all_data.loc[all_data['timeframe'] == 'M']
monthly_data = monthly_data.pivot_table(index=['date','state'],columns=['series'], values=['value'])
monthly_data.columns = monthly_data.columns.get_level_values(1)
monthly_data.reset_index(inplace=True)
monthly_data['date'] = pd.to_datetime(monthly_data['date'], format='%Y%m')

Output as csv files

annual_data.to_csv('electricity_grid_annual.csv', index=False)
monthly_data.to_csv('electricity_grid_monthly.csv', index=False)