登录查看更多内容

Guided Project - Build a Web App using Streamlit to show NYC collision data

Hassan Abbas

Software Design Engineer | LangChain | FastAPI | Flask | Vue | Quarkus | AI/ML

发布日期: 2022年4月3日

Introduction:

This article is about a guided hands-on project on coursera for building data science web app with the Streamlit library in Python. This is the first time that I am using streamlit to create a web app.

Initial Setup

Installing libraries:

import streamlit as s
import urllib3
import certifi
import json
import pandas as pd
import numpy as np
import pydeck as pdk
import plotly.express as pxt

If you are doing this project on your own computer and not using the cloud workspace provided by coursera you will have to install Streamlit library on your System.

By writing this in the terminal.

pip install streamlit

Now, write this in terminal to test if the library is installed properly.

streamlit hello

An app will open in your browser as shown in figure below.

Downloading the Dataset from NYC OpenData or Use API:

I will be using API Endpoint instead of the CSV file.

API Endpoint: https://data.cityofnewyork.us/resource/h9gi-nx95.json

# data_url = 'D:\python projects\Collision Data using Python and Streamlit\\Motor_Vehicle_Collisions_-_Crashes.csv
data_url = 'https://data.cityofnewyork.us/resource/h9gi-nx95.json'

Funtion to load_data:

@st.cache(persist=True
def load_data(nrows):

    # certify the source of data returns 200 if OK!
    http = urllib3.PoolManager(
        cert_reqs='CERT_REQUIRED',
        ca_certs=certifi.where())
    url = data_url
    req = http.request('GET', url)  # get data from the API
    print('Status of request: ', req.status)

    # load the json file (creates a list of dicts)
    dct = json.loads(req.data.decode('utf-8'))

    # creating dataframe from list of dicts
    df = pd.json_normalize(dct)

    # Nothing much just some data manipulation happening here
    df['crash_date_time'] = df['crash_date'].str.slice(0, 10) + " " + df['crash_time']
    df['crash_date_time'] = pd.to_datetime(df['crash_date_time'], format='%Y-%m-%d %H:%M')
    # del df['crash_date']
    # del df['crash_time']
    df.drop(['crash_date', 'crash_time'], inplace=True, axis=1)

    df[['number_of_persons_injured', 'number_of_persons_killed', 'number_of_pedestrians_injured',
        'number_of_pedestrians_killed', 'number_of_cyclist_injured',
        'number_of_cyclist_killed', 'number_of_motorist_injured',
        'number_of_motorist_killed'
        ]] = df[['number_of_persons_injured',
                 'number_of_persons_killed', 'number_of_pedestrians_injured',
                 'number_of_pedestrians_killed', 'number_of_cyclist_injured',
                 'number_of_cyclist_killed', 'number_of_motorist_injured',
                 'number_of_motorist_killed'
                 ]].astype(int)
    df[['latitude', 'longitude']] = df[['latitude', 'longitude']].astype(float)

    return df[:nrows]
)

Web App layout and Graphs:

data = load_data(10000)
original_data = data


st.title('motor vehicle collisions in nyc'.title())
st.markdown(f'''
This is a dashboard that can be used to analyze
motor vehicle collisions in nyc ??????
''')


st.header("Where are the most people injured in nyc?"
injured_people = st.slider('Number Of People Injured In Vehicle ', 0, 19)
st.map(data.query('number_of_persons_injured >= @injured_people')[["latitude", "longitude"]].dropna(how="any"))

st.header("how many collisions occur during given time of the day?".title())
hour = st.selectbox("hour to look at", range(0, 24), 1)
data = data[data['crash_date_time'].dt.hour == hour]

st.markdown("vehicle collisions between %i:00 and %i:00" % (hour % 24, (hour + 1) % 24))
midpoint = [data['latitude'].mean(skipna=True), data['longitude'].mean(skipna=True)]


st.write(pdk.Deck(
    map_style='mapbox://styles/mapbox/light-v10',
    initial_view_state={
        "latitude": midpoint[0],
        "longitude": midpoint[1],
        "zoom": 11,
        "pitch": 50,
    },
    layers=[
        pdk.Layer(
            "HexagonLayer",
            data=data[['crash_date_time', 'latitude', 'longitude']].dropna(),
            get_position=['longitude', 'latitude'],
            radius=100,
            extruded=True,
            pickable=True,
            elevation_Scale=4,
            elevation_range=[0, 1000],
        ),
    ]
))



st.subheader('breakdown by minute between %i:00 and %i:00' % (hour, (hour + 1) % 24))
filtered = data[
    (data['crash_date_time'].dt.hour >= hour) & (data['crash_date_time'].dt.hour < (hour + 1))
    ]
hist = np.histogram(filtered['crash_date_time'].dt.minute, bins=60, range=(0, 60))[0]
chart_data = pd.DataFrame({'minute': range(60), 'crashes': hist})
fig = px.bar(chart_data, x='minute', y='crashes', hover_data=['minute', 'crashes'], height=400)
st.write(fig)



st.header("top 5 dangerous streets in nyc by affected type".title())
select = st.selectbox('affected type of people', ["pedestrians", "cyclists", "motorists"])

if select == "pedestrians":
    st.write(original_data.query("number_of_pedestrians_injured >=1")
             [["on_street_name", "number_of_pedestrians_injured"]].sort_values(
        by=['number_of_pedestrians_injured'], ascending=False).dropna(how='any')[:5])

if select == "cyclists":
    st.write(original_data.query("number_of_cyclist_injured >=1")
             [["on_street_name", "number_of_cyclist_injured"]].sort_values(
        by=['number_of_cyclist_injured'], ascending=False).dropna(how='any')[:5])

elif select == "motorists":
    st.write(original_data.query("number_of_motorist_injured >=1")
             [["on_street_name", "number_of_motorist_injured"]].sort_values(
        by=['number_of_motorist_injured'], ascending=False).dropna(how='any')[:5])


if st.checkbox('Show raw data', False):
    st.subheader('raw data')
    st.write(original_data)

)

Refrences:

https://pandas.pydata.org/
https://python.plainenglish.io/from-api-to-pandas-getting-json-data-with-python-df127f699b6b
streamlit.io
https://deckgl.readthedocs.io/en/latest/deck.html
coursera.org

Hassan Abbas的更多文章

Introduction to Cloud Computing 101: What I Learned...

2025年3月13日

Introduction to Cloud Computing 101: What I Learned...

?? Just Completed 'Introduction to Cloud Computing 101' from AWS Educate! Here’s What I Learned… Ever wondered how…
Understanding the Central Limit Theorem

2022年12月25日

Understanding the Central Limit Theorem

Central Limit Theorem The Central Limit theorem formally states the if we sample from a population using a sufficiently…

Introduction:

Initial Setup

Refrences:

Hassan Abbas的更多文章

Introduction to Cloud Computing 101: What I Learned...

Understanding the Central Limit Theorem