Guided Project - Build a Web App using Streamlit to show NYC collision data

Guided Project - Build a Web App using Streamlit to show NYC collision data

Introduction:

This article is about a guided hands-on project on coursera for building data science web app with the Streamlit library in Python. This is the first time that I am using streamlit to create a web app.


Initial Setup

  1. Installing libraries:

import streamlit as s
import urllib3
import certifi
import json
import pandas as pd
import numpy as np
import pydeck as pdk
import plotly.express as pxt        

If you are doing this project on your own computer and not using the cloud workspace provided by coursera you will have to install Streamlit library on your System.

By writing this in the terminal.

pip install streamlit        

Now, write this in terminal to test if the library is installed properly.

streamlit hello        

An app will open in your browser as shown in figure below.

Streamlit Hello App


  1. Downloading the Dataset from NYC OpenData or Use API:

I will be using API Endpoint instead of the CSV file.

API Endpoint: https://data.cityofnewyork.us/resource/h9gi-nx95.json

# data_url = 'D:\python projects\Collision Data using Python and Streamlit\\Motor_Vehicle_Collisions_-_Crashes.csv
data_url = 'https://data.cityofnewyork.us/resource/h9gi-nx95.json'        

Funtion to load_data:

@st.cache(persist=True
def load_data(nrows):

    # certify the source of data returns 200 if OK!
    http = urllib3.PoolManager(
        cert_reqs='CERT_REQUIRED',
        ca_certs=certifi.where())
    url = data_url
    req = http.request('GET', url)  # get data from the API
    print('Status of request: ', req.status)

    # load the json file (creates a list of dicts)
    dct = json.loads(req.data.decode('utf-8'))

    # creating dataframe from list of dicts
    df = pd.json_normalize(dct)

    # Nothing much just some data manipulation happening here
    df['crash_date_time'] = df['crash_date'].str.slice(0, 10) + " " + df['crash_time']
    df['crash_date_time'] = pd.to_datetime(df['crash_date_time'], format='%Y-%m-%d %H:%M')
    # del df['crash_date']
    # del df['crash_time']
    df.drop(['crash_date', 'crash_time'], inplace=True, axis=1)

    df[['number_of_persons_injured', 'number_of_persons_killed', 'number_of_pedestrians_injured',
        'number_of_pedestrians_killed', 'number_of_cyclist_injured',
        'number_of_cyclist_killed', 'number_of_motorist_injured',
        'number_of_motorist_killed'
        ]] = df[['number_of_persons_injured',
                 'number_of_persons_killed', 'number_of_pedestrians_injured',
                 'number_of_pedestrians_killed', 'number_of_cyclist_injured',
                 'number_of_cyclist_killed', 'number_of_motorist_injured',
                 'number_of_motorist_killed'
                 ]].astype(int)
    df[['latitude', 'longitude']] = df[['latitude', 'longitude']].astype(float)

    return df[:nrows]
)        

Web App layout and Graphs:

data = load_data(10000)
original_data = data


st.title('motor vehicle collisions in nyc'.title())
st.markdown(f'''
This is a dashboard that can be used to analyze
motor vehicle collisions in nyc ??????
''')


st.header("Where are the most people injured in nyc?"
injured_people = st.slider('Number Of People Injured In Vehicle ', 0, 19)
st.map(data.query('number_of_persons_injured >= @injured_people')[["latitude", "longitude"]].dropna(how="any"))

st.header("how many collisions occur during given time of the day?".title())
hour = st.selectbox("hour to look at", range(0, 24), 1)
data = data[data['crash_date_time'].dt.hour == hour]

st.markdown("vehicle collisions between %i:00 and %i:00" % (hour % 24, (hour + 1) % 24))
midpoint = [data['latitude'].mean(skipna=True), data['longitude'].mean(skipna=True)]


st.write(pdk.Deck(
    map_style='mapbox://styles/mapbox/light-v10',
    initial_view_state={
        "latitude": midpoint[0],
        "longitude": midpoint[1],
        "zoom": 11,
        "pitch": 50,
    },
    layers=[
        pdk.Layer(
            "HexagonLayer",
            data=data[['crash_date_time', 'latitude', 'longitude']].dropna(),
            get_position=['longitude', 'latitude'],
            radius=100,
            extruded=True,
            pickable=True,
            elevation_Scale=4,
            elevation_range=[0, 1000],
        ),
    ]
))



st.subheader('breakdown by minute between %i:00 and %i:00' % (hour, (hour + 1) % 24))
filtered = data[
    (data['crash_date_time'].dt.hour >= hour) & (data['crash_date_time'].dt.hour < (hour + 1))
    ]
hist = np.histogram(filtered['crash_date_time'].dt.minute, bins=60, range=(0, 60))[0]
chart_data = pd.DataFrame({'minute': range(60), 'crashes': hist})
fig = px.bar(chart_data, x='minute', y='crashes', hover_data=['minute', 'crashes'], height=400)
st.write(fig)



st.header("top 5 dangerous streets in nyc by affected type".title())
select = st.selectbox('affected type of people', ["pedestrians", "cyclists", "motorists"])

if select == "pedestrians":
    st.write(original_data.query("number_of_pedestrians_injured >=1")
             [["on_street_name", "number_of_pedestrians_injured"]].sort_values(
        by=['number_of_pedestrians_injured'], ascending=False).dropna(how='any')[:5])

if select == "cyclists":
    st.write(original_data.query("number_of_cyclist_injured >=1")
             [["on_street_name", "number_of_cyclist_injured"]].sort_values(
        by=['number_of_cyclist_injured'], ascending=False).dropna(how='any')[:5])

elif select == "motorists":
    st.write(original_data.query("number_of_motorist_injured >=1")
             [["on_street_name", "number_of_motorist_injured"]].sort_values(
        by=['number_of_motorist_injured'], ascending=False).dropna(how='any')[:5])


if st.checkbox('Show raw data', False):
    st.subheader('raw data')
    st.write(original_data)

)        

Refrences:

  • https://pandas.pydata.org/
  • https://python.plainenglish.io/from-api-to-pandas-getting-json-data-with-python-df127f699b6b
  • streamlit.io
  • https://deckgl.readthedocs.io/en/latest/deck.html
  • coursera.org

Matthew Flynn

LVER at New York State Department of Labor

9 个月

Excellent job on your Coursera Guided Project!

要查看或添加评论,请登录

Hassan Abbas的更多文章