Data Analytics Cyclistic Bike Share Case Study using R and Tableau

Question to answer in the case study: How do annual members and casual riders use Cyclistic bikes differently?

For this data analytics case study, data from the last 3 quarters of 2019 and 1st quarter of 2020 was chosen.

First of all, i did data cleaning and data manipulation using R in RStudio.

Installing Packages

install.packages("tidyverse"
install.packages("dplyr")
install.packages("lubridate"))        

Loading Libraries

library(tidyverse
library(dplyr)
library(lubridate))        

Loading Data Files

q1_2020 <- read.csv("Divvy_Trips_2020_Q1.csv"


q2_2019 <- read.csv("Divvy_Trips_2019_Q2.csv")


q3_2019 <- read.csv("Divvy_Trips_2019_Q3.csv")


q4_2019 <- read.csv("Divvy_Trips_2019_Q4.csv"))        

Renaming Columns to make column names consistent with q1_2020 data

(q4_2019 <- rename(q4_201
? ? ? ? ? ? ? ? ? ?,ride_id = trip_id
? ? ? ? ? ? ? ? ? ?,rideable_type = bikeid?
? ? ? ? ? ? ? ? ? ?,started_at = start_time??
? ? ? ? ? ? ? ? ? ?,ended_at = end_time??
? ? ? ? ? ? ? ? ? ?,start_station_name = from_station_name?
? ? ? ? ? ? ? ? ? ?,start_station_id = from_station_id?
? ? ? ? ? ? ? ? ? ?,end_station_name = to_station_name?
? ? ? ? ? ? ? ? ? ?,end_station_id = to_station_id?
? ? ? ? ? ? ? ? ? ?,member_casual = usertype))
? ? ? ? ? ? ? ? ? ?


(q3_2019 <- rename(q3_2019
? ? ? ? ? ? ? ? ? ?,ride_id = trip_id
? ? ? ? ? ? ? ? ? ?,rideable_type = bikeid?
? ? ? ? ? ? ? ? ? ?,started_at = start_time??
? ? ? ? ? ? ? ? ? ?,ended_at = end_time??
? ? ? ? ? ? ? ? ? ?,start_station_name = from_station_name?
? ? ? ? ? ? ? ? ? ?,start_station_id = from_station_id?
? ? ? ? ? ? ? ? ? ?,end_station_name = to_station_name?
? ? ? ? ? ? ? ? ? ?,end_station_id = to_station_id?
? ? ? ? ? ? ? ? ? ?,member_casual = usertype))
? ? ? ? ? ? ? ? ? ?
?(q2_2019 <- rename(q2_2019
? ? ? ? ? ? ? ? ? ?,ride_id = X01...Rental.Details.Rental.ID
? ? ? ? ? ? ? ? ? ?,rideable_type = X01...Rental.Details.Bike.ID?
? ? ? ? ? ? ? ? ? ?,started_at = X01...Rental.Details.Local.Start.Time??
? ? ? ? ? ? ? ? ? ?,ended_at = X01...Rental.Details.Local.End.Time??
? ? ? ? ? ? ? ? ? ?,start_station_name = X03...Rental.Start.Station.Name?
? ? ? ? ? ? ? ? ? ?,start_station_id = X03...Rental.Start.Station.ID
? ? ? ? ? ? ? ? ? ?,end_station_name = X02...Rental.End.Station.Name?
? ? ? ? ? ? ? ? ? ?,end_station_id = X02...Rental.End.Station.ID
? ? ? ? ? ? ? ? ? ?,member_casual = User.Type))9        

Converting data types of "ride_id and rideable_type" columns in 2019 files to "character" to make their data types consistent with the data types of their corresponding columns in q4_2020


q2_2019 <- mutate(q2_2019, ride_id = as.character(ride_id)
? ? ? ? ? ? ? ? ? rideable_type = as.character(rideable_type))


q3_2019 <- mutate(q3_2019, ride_id = as.character(ride_id),
? ? ? ? ? ? ? ? ? rideable_type = as.character(rideable_type))


q4_2019 <- mutate(q4_2019, ride_id = as.character(ride_id),
? ? ? ? ? ? ? ? ? rideable_type = as.character(rideable_type))        

Binding all 4 files together in 1 file and naming it "all_trips"

all_trips <- bind_rows(q2_2019, q3_2019, q4_2019, q1_2020)        

Dropping un-wanted columns

all_trips <- all_trips %>% select(-c(start_lat, start_lng, end_lat, end_lng, birthyear, gender
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?X01...Rental.Details.Duration.In.Seconds.Uncapped,?
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?X05...Member.Details.Member.Birthday.Year, Member.Gender,?
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tripduration)),        

Reducing types of members from 4 to 2 in the member_casual column

all_trips <-? all_trips %>%
? mutate(member_casual = recode(member_casual,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "Subscriber" = "member",
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "Customer" = "casual"))?        

Adding columns of Date, Month, Day, Year and Day of Week in the combined file

all_trips$Date <- format(as.Date(all_trips$started_at, format = "%m/%d/%Y"), "%d-%b-%Y"
all_trips$Month <- format(as.Date(all_trips$started_at, format = "%m/%d/%Y"), "%b")
all_trips$Day <- format(as.Date(all_trips$started_at, format = "%m/%d/%Y"), "%d")
all_trips$Year <- format(as.Date(all_trips$started_at, format = "%m/%d/%Y"), "%Y")
all_trips$Day_of_Week <- format(as.Date(all_trips$started_at, format = "%m/%d/%Y"), "%A"))        

Calculating Ride Length and adding a column for it as Ride_Length in the combined file

all_trips$Ride_Length <- difftime(strptime(all_trips$ended_at, "%m/%d/%Y %H:%M")
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? strptime(all_trips$started_at, "%m/%d/%Y %H:%M"), units = "secs"),        

Dropping data where Ride Length is less than zero

all_trips_v2 <- all_trips[!(all_trips$start_station_name == "HQ QR" | all_trips$Ride_Length<0),]        

Dropping data with NA values

all_trips_v3 <- all_trips_v2 %>% filter(all_trips_v2$Ride_Length != "NA")        

Giving a proper order to the Day_of_Week column

all_trips_v3$Day_of_Week <- ordered(all_trips_v3$Day_of_Week,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? levels=c("Sunday", "Monday", "Tuesday",?
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"Wednesday", "Thursday", "Friday", "Saturday"))?        

Writing Data in a CSV file to create visualizations in Tableau

write.csv(all_trips_v3, file = "Cleaned_Data.csv")        

Now Data Visualisation in Tableau

As we have cleaned the data so now we are ready to visualize data in Tableau.

Dashboard consists of 6 individual sheets which are as follow

  • Average ride duration (seconds) per day for different types of users.
  • Frequency of rides by day of the week and number of rides each day shared by users.
  • Percentage of rides by user/member type.
  • Frequency of rides by year and respective months of the respective year along with the number of rides by user type each month.
  • Top 5 Busiest Stations For Starting Rides and the proportion of user types on each of these stations.
  • ?5 Least Busy Stations for Starting Rides and the proportion of user types on each of these stations.

Dashboard

Two high-level filtering options are provided in the dashboard which are filter by year and filter by user type.

No alt text provided for this image
Sumaira B.

Project Coordinator | Certified Project Manager CPM | Certified Scrum Master CSM | Business Analyst | Agile | Servant Leader | Requirements Gathering Analyst | Jira | Administrative Manager |

1 å¹´

Can it be done just with Q1

赞
回复
Sumaira B.

Project Coordinator | Certified Project Manager CPM | Certified Scrum Master CSM | Business Analyst | Agile | Servant Leader | Requirements Gathering Analyst | Jira | Administrative Manager |

1 å¹´

great work , I am still stuck in it i wonder A non IT person cant do it easly ,

赞
回复
Zahad Ali Zafar

Senior Executive (Data Science and Data Management) @ Zong.CMPak

2 å¹´

Great work P

赞
回复

要查看或添加评论,请登录

Ahmad Nawaz的更多文章

社区洞察

其他会员也浏览了