#install.packages("lubridate")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## 1.1 Read Data
data2012 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2012.csv')
data2013 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2013.csv')
data2014 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2014.csv')
data2015 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2015.csv')
data2016 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2016.csv')
## 1.2 Combain 2012 - 2016 data sets
# Rename columns in data2015
names(data2015)[names(data2015) == "Missing.Diagnosis"] <- "Pt...Diagnosis"
names(data2015)[names(data2015) == "Calc...Transport.Mins"] <- "Calc...Transfer.Mins"
# Rename columns in data2016 if necessary
names(data2016)[names(data2016) == "Calc...Transport.Mins"] <- "Calc...Transfer.Mins"
# Now, try to bind the data frames again
data <- rbind(data2012, data2013, data2014, data2015, data2016)
## 1.3 Select the columns that we are interesting and rename
select_data <- data.frame(
Date = data$Date,
Weight = data$Pt...Weight,
MOI = data$X6.Categories,
#Twelve_cats = data$X12.Categories,
Arrival = data$Time.Unit.Arrival,
Depart = data$Time.Unit.Depart,
Time_period = data$Calc...Scene.Mins,
Response_priority = data$Resp..Priority,
Transportation_priority = data$Trans..Priority
)
## 1.4 Remove NA Missing Values
cleaned_data <- na.omit(select_data)
colSums(is.na(cleaned_data))
## Date Weight MOI
## 0 0 0
## Arrival Depart Time_period
## 0 0 0
## Response_priority Transportation_priority
## 0 0
## 1.5 Add new Year, Month and Date columns
library(lubridate)
cleaned_data$Year <- year(mdy(cleaned_data$Date))
cleaned_data$Month <- as.numeric(format(as.Date(cleaned_data$Date, format="%m/%d/%Y"), "%m"))
cleaned_data$Day <- as.numeric(format(as.Date(cleaned_data$Date, format="%m/%d/%Y"), "%d"))
## 1.6 Clean Six
cleaned_data$MOI <- tolower(cleaned_data$MOI)
#cleaned_data$Twelve_cats[cleaned_data$Twelve_cats == "Fall - Other"] <- "Other"
## 1.7 Add new columns tenmins 1 means under ten mins, and 0 means over ten mins Y
cleaned_data$Under_10_mins <- ifelse(cleaned_data$Time_period <= 10, 1, 0)
## 1.8 Convert arrive and depart time to mins
convert_time <- function(time_str) {
time <- strptime(time_str, format="%I:%M:%S %p")
return(as.numeric(format(time, "%H"))*60 + as.numeric(format(time, "%M")) + as.numeric(format(time, "%S"))/60)
}
cleaned_data$Arrival_min <- sapply(cleaned_data$Arrival, convert_time)
cleaned_data$Depart_min <- sapply(cleaned_data$Depart, convert_time)
## 1.9 create new column of 10-mins policy issued
cleaned_data$Date <- as.Date(cleaned_data$Date, format="%m/%d/%Y")
cleaned_data$Intervention <- ifelse(cleaned_data$Date < as.Date("2014-12-18"), "A_pre", "B_post")
# 1.10 New columns of division of the day
# Function to categorize time into part of the day
get_time_of_day <- function(minutes) {
hour <- minutes / 60 # Convert minutes to hours
if (hour >= 5 && hour < 12) {
return('Morning')
} else if (hour >= 12 && hour < 17) {
return('Afternoon')
} else if (hour >= 17 && hour < 21) {
return('Evening')
} else {
return('Night')
}
}
# Apply the function to Arrival_min and Depart_min
cleaned_data$Arrival_Time_of_Day <- sapply(cleaned_data$Arrival_min, get_time_of_day)
cleaned_data$Depart_Time_of_Day <- sapply(cleaned_data$Depart_min, get_time_of_day)
table(cleaned_data$Arrival_Time_of_Day, cleaned_data$Depart_Time_of_Day)
##
## Afternoon Evening Morning Night
## Afternoon 1089 51 0 0
## Evening 0 1191 0 54
## Morning 32 0 881 0
## Night 0 0 21 1638
# 1.11 Drop the errors of time_period
# Replace values in Time_period with NA when they are equal to 1395.00
cleaned_data$Time_period[cleaned_data$Time_period == 1395.00] <- NA
cleaned_data <- na.omit(cleaned_data)
# Check if there are any negative values in Time_period
has_negatives <- any(cleaned_data$Time_period < 0)
# Print the result
print(has_negatives)
## [1] FALSE
summary(cleaned_data$Time_period)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 7.88 11.00 12.46 15.00 199.00
## 1.12 category to factor
cleaned_data$MOI <- as.factor(cleaned_data$MOI)
#cleaned_data$Twelve_cats <- as.factor(cleaned_data$Twelve_cats)
cleaned_data$Intervention <- as.factor(cleaned_data$Intervention)
cleaned_data$Response_priority <- as.factor(cleaned_data$Response_priority)
cleaned_data$Transportation_priority <- as.factor(cleaned_data$Transportation_priority)
cleaned_data$Under_10_mins <- factor(cleaned_data$Under_10_mins, levels = c(0, 1))
cleaned_data$Arrival_Time_of_Day <- as.factor(cleaned_data$Arrival_Time_of_Day)
cleaned_data$Depart_Time_of_Day <- as.factor(cleaned_data$Depart_Time_of_Day)
colSums(is.na(cleaned_data))
## Date Weight MOI
## 0 0 0
## Arrival Depart Time_period
## 0 0 0
## Response_priority Transportation_priority Year
## 0 0 0
## Month Day Under_10_mins
## 0 0 0
## Arrival_min Depart_min Intervention
## 0 0 0
## Arrival_Time_of_Day Depart_Time_of_Day
## 0 0
## 1.13 Save the final data set
#write.csv(cleaned_data, file = "final_data.csv", row.names = FALSE)
# For numeric
hist(cleaned_data$Weight, main="Distribution of Weight", xlab="Weight (lb)", ylab="Frequency")
# For Category
bp <- function(categorical_data) {
# Check if the input is a factor or character, if not, return an error message
if (!is.factor(categorical_data) && !is.character(categorical_data)) {
stop("The input data must be a categorical (factor or character) variable.")
}
# Calculate counts and percentages
data_summary <- as.data.frame(table(categorical_data)) %>%
rename(Category = categorical_data) %>%
mutate(Percentage = Freq / sum(Freq) * 100)
# Create the plot
ggplot(data_summary, aes(x = Category, y = Freq, fill = Category)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(Freq, " (", round(Percentage, 1), "%)")),
position = position_stack(vjust = 0.5), size = 3) +
labs(title = paste("Bar Plot of Year 2012 to 2016"),#,#deparse(substitute(categorical_data))),
x = "Year",
y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Adjust text angle for readability
}
bp2 <- function(categorical_dataY, categorical_data2) {
# Check if the inputs are categorical
if (!is.factor(categorical_dataY) && !is.character(categorical_dataY)) {
stop("categorical_dataY must be a categorical (factor or character) variable.")
}
if (!is.factor(categorical_data2) && !is.character(categorical_data2)) {
stop("categorical_data2 must be a categorical (factor or character) variable.")
}
# Calculate counts and percentages
data_summary <- as.data.frame(table(categorical_dataY, categorical_data2)) %>%
rename(Category = categorical_dataY, Subcategory = categorical_data2) %>%
group_by(Category) %>%
mutate(Percentage = Freq / sum(Freq) * 100)
# Create the plot
ggplot(data_summary, aes(x = Category, y = Freq, fill = Subcategory)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(Freq, " (", round(Percentage, 1), "%)")),
position = position_stack(vjust = 0.5), size = 3) +
labs(title = paste("Bar Plot of Intervention vs. Transportation Priority"),#, deparse(substitute(categorical_dataY)), "vs", deparse(substitute(categorical_data2))),
x = "Intervention",
y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
}
heatmap_for_two_categories <- function(cat_data1, cat_data2) {
# Check if both inputs are categorical (factor or character)
if (!is.factor(cat_data1) && !is.character(cat_data1)) {
stop("cat_data1 should be a factor or a character vector.")
} else {
cat_data1 <- as.factor(cat_data1)
}
if (!is.factor(cat_data2) && !is.character(cat_data2)) {
stop("cat_data2 should be a factor or a character vector.")
} else {
cat_data2 <- as.factor(cat_data2)
}
# Create a data frame of counts and calculate percentages
data_summary <- data.frame(cat_data1, cat_data2) %>%
count(cat_data1, cat_data2) %>%
mutate(percentage = n / sum(n) * 100)
# Create the heatmap
ggplot(data_summary, aes(x = cat_data1, y = cat_data2, fill = n)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "red") + # Adjust to shades of red
labs(title = "Heatmap of Category Combinations",
x = deparse(substitute(cat_data1)),
y = deparse(substitute(cat_data2)),
fill = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7), # Adjust text angle and size
axis.text.y = element_text(size = 7)) +
geom_text(aes(label = paste0(n, "\n(", round(percentage, 1), "%)")), size = 3) # Add count and percentage labels
}
# Usage Example
bp(cleaned_data$Under_10_mins)
bp(cleaned_data$Intervention)
bp(cleaned_data$MOI)
bp(cleaned_data$Response_priority)
bp(cleaned_data$Transportation_priority)
bp(as.factor(cleaned_data$Arrival_Time_of_Day))
bp(as.factor(cleaned_data$Depart_Time_of_Day))
bp2(cleaned_data$Under_10_mins, cleaned_data$MOI)
bp2(cleaned_data$Under_10_mins, cleaned_data$Response_priority)
bp2(cleaned_data$Under_10_mins, cleaned_data$Transportation_priority)
bp2(cleaned_data$Under_10_mins, cleaned_data$Intervention)
bp2(cleaned_data$Under_10_mins, cleaned_data$Arrival_Time_of_Day)
bp2(cleaned_data$Under_10_mins, cleaned_data$Depart_Time_of_Day)
heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$MOI) + xlab("Under_10_mins") + ylab("Six Category")
heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$Response_priority) + xlab("Under_10_mins") + ylab("Response priority")
heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$Transportation_priority) + xlab("Under_10_mins") + ylab("Transportation priority")
heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$Intervention) + xlab("Under_10_mins") + ylab("Intervention")
heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$Arrival_Time_of_Day) + xlab("Under_10_mins") + ylab("Arrival Time of Day")
heatmap_for_two_categories(cleaned_data$Intervention, cleaned_data$Transportation_priority) + xlab("Intervention") + ylab("Transportation Priority")
ggplot(cleaned_data, aes(x = Under_10_mins, y = Weight, group = Under_10_mins, color = as.factor(Under_10_mins))) +
geom_boxplot() +
theme_minimal() +
labs(title = "Boxplots of Trauma Scene Times Under_10_mins vs. Patient Weight from 2012 to 2016", x = "Under_10_mins", y = "Weight", color = "Under_10_mins")
summary(cleaned_data$Weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.0 145.0 170.0 169.2 200.0 490.0
hist(cleaned_data$Weight, breaks = 30, main = "Distribution of Weight", xlab = "Weight (lb)", ylab = "Frequency", col = "lightblue", border = "black")
dens <- density(cleaned_data$Weight)
m0.1 <- glm(Under_10_mins ~ Weight + Intervention, data = cleaned_data, family = 'binomial')
m0.2 <- glm(Under_10_mins ~ Weight + Intervention + MOI + Arrival_Time_of_Day, data = cleaned_data, family = 'binomial')
m0.3 <- glm(Under_10_mins ~ Weight + Intervention + MOI + Arrival_Time_of_Day + Response_priority + Transportation_priority , data = cleaned_data, family = 'binomial')
summary(m0.1)
##
## Call:
## glm(formula = Under_10_mins ~ Weight + Intervention, family = "binomial",
## data = cleaned_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3969 -1.0738 -0.9749 1.2224 1.6529
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.1038857 0.0960692 1.081 0.28
## Weight -0.0026122 0.0005324 -4.906 9.29e-07 ***
## InterventionB_post 0.4352198 0.0589796 7.379 1.59e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6834.0 on 4954 degrees of freedom
## Residual deviance: 6757.2 on 4952 degrees of freedom
## AIC: 6763.2
##
## Number of Fisher Scoring iterations: 4
summary(m0.2)
##
## Call:
## glm(formula = Under_10_mins ~ Weight + Intervention + MOI + Arrival_Time_of_Day,
## family = "binomial", data = cleaned_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8464 -0.9894 -0.7474 1.1399 1.9704
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1101578 0.1945595 5.706 1.16e-08 ***
## Weight -0.0038456 0.0005679 -6.772 1.27e-11 ***
## InterventionB_post 0.4474462 0.0619568 7.222 5.13e-13 ***
## MOIcrash -1.0015018 0.1745004 -5.739 9.51e-09 ***
## MOIfall -1.4809161 0.1829273 -8.096 5.70e-16 ***
## MOIgsw 0.2280596 0.1871153 1.219 0.22291
## MOIother -0.9355178 0.1803977 -5.186 2.15e-07 ***
## MOIsw 0.3486210 0.1919816 1.816 0.06938 .
## Arrival_Time_of_DayEvening 0.0437772 0.0867203 0.505 0.61369
## Arrival_Time_of_DayMorning -0.1541896 0.0945137 -1.631 0.10281
## Arrival_Time_of_DayNight -0.2303074 0.0838409 -2.747 0.00602 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6834.0 on 4954 degrees of freedom
## Residual deviance: 6302.3 on 4944 degrees of freedom
## AIC: 6324.3
##
## Number of Fisher Scoring iterations: 4
summary(m0.3)
##
## Call:
## glm(formula = Under_10_mins ~ Weight + Intervention + MOI + Arrival_Time_of_Day +
## Response_priority + Transportation_priority, family = "binomial",
## data = cleaned_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9173 -0.9947 -0.7098 1.1102 2.0855
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.2480003 0.2120146 5.886 3.95e-09 ***
## Weight -0.0037984 0.0005698 -6.666 2.64e-11 ***
## InterventionB_post 0.4403035 0.0632615 6.960 3.40e-12 ***
## MOIcrash -1.1567499 0.1781652 -6.493 8.44e-11 ***
## MOIfall -1.3991546 0.1858058 -7.530 5.07e-14 ***
## MOIgsw 0.0523367 0.1908933 0.274 0.78396
## MOIother -0.7940369 0.1848848 -4.295 1.75e-05 ***
## MOIsw 0.2408345 0.1944282 1.239 0.21546
## Arrival_Time_of_DayEvening 0.0153239 0.0873861 0.175 0.86080
## Arrival_Time_of_DayMorning -0.1721206 0.0951113 -1.810 0.07035 .
## Arrival_Time_of_DayNight -0.2697102 0.0846577 -3.186 0.00144 **
## Response_priority2 -0.3815487 0.0925467 -4.123 3.74e-05 ***
## Response_priority3 -0.6301143 0.1318657 -4.778 1.77e-06 ***
## Transportation_priority2 0.1607452 0.0905847 1.775 0.07598 .
## Transportation_priority3 -0.1475596 0.1013079 -1.457 0.14524
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6834.0 on 4954 degrees of freedom
## Residual deviance: 6245.1 on 4940 degrees of freedom
## AIC: 6275.1
##
## Number of Fisher Scoring iterations: 4
model1 <- glm(Under_10_mins ~ Weight + Response_priority + Transportation_priority + Intervention + MOI + Arrival_Time_of_Day , data = cleaned_data, family = 'binomial')
summary(model1)
##
## Call:
## glm(formula = Under_10_mins ~ Weight + Response_priority + Transportation_priority +
## Intervention + MOI + Arrival_Time_of_Day, family = "binomial",
## data = cleaned_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9173 -0.9947 -0.7098 1.1102 2.0855
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.2480003 0.2120146 5.886 3.95e-09 ***
## Weight -0.0037984 0.0005698 -6.666 2.64e-11 ***
## Response_priority2 -0.3815487 0.0925467 -4.123 3.74e-05 ***
## Response_priority3 -0.6301143 0.1318657 -4.778 1.77e-06 ***
## Transportation_priority2 0.1607452 0.0905847 1.775 0.07598 .
## Transportation_priority3 -0.1475596 0.1013079 -1.457 0.14524
## InterventionB_post 0.4403035 0.0632615 6.960 3.40e-12 ***
## MOIcrash -1.1567499 0.1781652 -6.493 8.44e-11 ***
## MOIfall -1.3991546 0.1858058 -7.530 5.07e-14 ***
## MOIgsw 0.0523367 0.1908933 0.274 0.78396
## MOIother -0.7940369 0.1848848 -4.295 1.75e-05 ***
## MOIsw 0.2408345 0.1944282 1.239 0.21546
## Arrival_Time_of_DayEvening 0.0153239 0.0873861 0.175 0.86080
## Arrival_Time_of_DayMorning -0.1721206 0.0951113 -1.810 0.07035 .
## Arrival_Time_of_DayNight -0.2697102 0.0846577 -3.186 0.00144 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6834.0 on 4954 degrees of freedom
## Residual deviance: 6245.1 on 4940 degrees of freedom
## AIC: 6275.1
##
## Number of Fisher Scoring iterations: 4
\[ P_{Under 10 mins} = \dfrac{1}{1+e^{-{S_1}}} \] \[ \begin{align*} S_1 =\beta X= & \ 1.248 - 0.0038 \times \text{Weight} - 0.382 \times \text{RP2} \\ & - 0.630 \times \text{RP3} + 0.161 \times \text{TP2} \\ & - 0.148 \times \text{TP3} + 0.440 \times \text{Intervention post} \\ & - 1.157 \times \text{Crash} - 1.399 \times \text{Fall} + 0.052 \times \text{GSW} \\ & - 0.794 \times \text{Other} + 0.241 \times \text{SW} \\ & + 0.015 \times \text{Evening} - 0.172 \times \text{Morning} \\ & - 0.269 \times \text{Night} \end{align*} \]
model2 <- glm(Under_10_mins ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day + Transportation_priority*Intervention, data = cleaned_data, family = 'binomial')
summary(model2)
##
## Call:
## glm(formula = Under_10_mins ~ Weight + Response_priority + Transportation_priority +
## MOI + Intervention + Arrival_Time_of_Day + Transportation_priority *
## Intervention, family = "binomial", data = cleaned_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9457 -0.9861 -0.7125 1.1008 2.0641
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 1.2833189 0.2160558 5.940
## Weight -0.0038257 0.0005702 -6.709
## Response_priority2 -0.3832384 0.0925574 -4.141
## Response_priority3 -0.6209733 0.1320798 -4.702
## Transportation_priority2 0.0761672 0.1055603 0.722
## Transportation_priority3 -0.1259148 0.1183677 -1.064
## MOIcrash -1.1464167 0.1780727 -6.438
## MOIfall -1.3945810 0.1857158 -7.509
## MOIgsw 0.0600184 0.1908213 0.315
## MOIother -0.7880242 0.1847653 -4.265
## MOIsw 0.2525791 0.1944432 1.299
## InterventionB_post 0.2674668 0.1893956 1.412
## Arrival_Time_of_DayEvening 0.0177034 0.0874384 0.202
## Arrival_Time_of_DayMorning -0.1711538 0.0951700 -1.798
## Arrival_Time_of_DayNight -0.2684162 0.0847338 -3.168
## Transportation_priority2:InterventionB_post 0.2761208 0.2056824 1.342
## Transportation_priority3:InterventionB_post 0.0087868 0.2243956 0.039
## Pr(>|z|)
## (Intercept) 2.85e-09 ***
## Weight 1.96e-11 ***
## Response_priority2 3.46e-05 ***
## Response_priority3 2.58e-06 ***
## Transportation_priority2 0.47057
## Transportation_priority3 0.28744
## MOIcrash 1.21e-10 ***
## MOIfall 5.95e-14 ***
## MOIgsw 0.75312
## MOIother 2.00e-05 ***
## MOIsw 0.19395
## InterventionB_post 0.15789
## Arrival_Time_of_DayEvening 0.83955
## Arrival_Time_of_DayMorning 0.07211 .
## Arrival_Time_of_DayNight 0.00154 **
## Transportation_priority2:InterventionB_post 0.17945
## Transportation_priority3:InterventionB_post 0.96876
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6834.0 on 4954 degrees of freedom
## Residual deviance: 6240.8 on 4938 degrees of freedom
## AIC: 6274.8
##
## Number of Fisher Scoring iterations: 4
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
m2 <- glm(Under_10_mins~Transportation_priority*Intervention, data = cleaned_data, family = 'binomial')
m3 <- glm(Under_10_mins~Transportation_priority*Response_priority, data = cleaned_data, family = 'binomial')
plot(allEffects(m2),ask=F)
\[ P_{Under 10 mins} = \dfrac{1}{1+e^{-{S_2}}} \] $$ \[\begin{align*} \text S_2 =\beta X= & \ 1.283 - 0.0038 \times \text{Weight} - 0.383 \times \text{RP2} \\ & - 0.621 \times \text{RP3} + 0.076 \times \text{TP2} \\ & - 0.126 \times \text{TP3} - 1.146 \times \text{Crash} \\ & - 1.395 \times \text{Fall} + 0.060 \times \text{GSW} - 0.788 \times \text{Other} \\ & + 0.253 \times \text{SW} + 0.268 \times \text{Intervention post} \\ & + 0.018 \times \text{Evening} - 0.171 \times \text{Morning} \\ & - 0.268 \times \text{Night} + 0.276 \times \text{TP2*Intervention post} \\ & + 0.0088 \times \text{TP3*Intervention post} \end{align*}\]
$$
library(broom)
model_summary_df1 <- broom::tidy(model1)
print(model_summary_df1)
## # A tibble: 15 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1.25 0.212 5.89 3.95e- 9
## 2 Weight -0.00380 0.000570 -6.67 2.64e-11
## 3 Response_priority2 -0.382 0.0925 -4.12 3.74e- 5
## 4 Response_priority3 -0.630 0.132 -4.78 1.77e- 6
## 5 Transportation_priority2 0.161 0.0906 1.77 7.60e- 2
## 6 Transportation_priority3 -0.148 0.101 -1.46 1.45e- 1
## 7 InterventionB_post 0.440 0.0633 6.96 3.40e-12
## 8 MOIcrash -1.16 0.178 -6.49 8.44e-11
## 9 MOIfall -1.40 0.186 -7.53 5.07e-14
## 10 MOIgsw 0.0523 0.191 0.274 7.84e- 1
## 11 MOIother -0.794 0.185 -4.29 1.75e- 5
## 12 MOIsw 0.241 0.194 1.24 2.15e- 1
## 13 Arrival_Time_of_DayEvening 0.0153 0.0874 0.175 8.61e- 1
## 14 Arrival_Time_of_DayMorning -0.172 0.0951 -1.81 7.03e- 2
## 15 Arrival_Time_of_DayNight -0.270 0.0847 -3.19 1.44e- 3
model_summary_df2 <- broom::tidy(model2)
print(model_summary_df2)
## # A tibble: 17 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1.28 0.216 5.94 2.85e- 9
## 2 Weight -0.00383 0.000570 -6.71 1.96e-11
## 3 Response_priority2 -0.383 0.0926 -4.14 3.46e- 5
## 4 Response_priority3 -0.621 0.132 -4.70 2.58e- 6
## 5 Transportation_priority2 0.0762 0.106 0.722 4.71e- 1
## 6 Transportation_priority3 -0.126 0.118 -1.06 2.87e- 1
## 7 MOIcrash -1.15 0.178 -6.44 1.21e-10
## 8 MOIfall -1.39 0.186 -7.51 5.95e-14
## 9 MOIgsw 0.0600 0.191 0.315 7.53e- 1
## 10 MOIother -0.788 0.185 -4.26 2.00e- 5
## 11 MOIsw 0.253 0.194 1.30 1.94e- 1
## 12 InterventionB_post 0.267 0.189 1.41 1.58e- 1
## 13 Arrival_Time_of_DayEvening 0.0177 0.0874 0.202 8.40e- 1
## 14 Arrival_Time_of_DayMorning -0.171 0.0952 -1.80 7.21e- 2
## 15 Arrival_Time_of_DayNight -0.268 0.0847 -3.17 1.54e- 3
## 16 Transportation_priority2:InterventionB… 0.276 0.206 1.34 1.79e- 1
## 17 Transportation_priority3:InterventionB… 0.00879 0.224 0.0392 9.69e- 1
data.frame(round(exp(cbind(Estimate=coef(model1), confint(model1))),3))
## Waiting for profiling to be done...
## Estimate X2.5.. X97.5..
## (Intercept) 3.483 2.308 5.302
## Weight 0.996 0.995 0.997
## Response_priority2 0.683 0.569 0.818
## Response_priority3 0.533 0.410 0.688
## Transportation_priority2 1.174 0.984 1.403
## Transportation_priority3 0.863 0.708 1.053
## InterventionB_post 1.553 1.372 1.758
## MOIcrash 0.315 0.221 0.444
## MOIfall 0.247 0.171 0.354
## MOIgsw 1.054 0.722 1.527
## MOIother 0.452 0.313 0.647
## MOIsw 1.272 0.866 1.857
## Arrival_Time_of_DayEvening 1.015 0.856 1.205
## Arrival_Time_of_DayMorning 0.842 0.699 1.014
## Arrival_Time_of_DayNight 0.764 0.647 0.901
data.frame(round(exp(cbind(Estimate=coef(model2), confint(model2))),3))
## Waiting for profiling to be done...
## Estimate X2.5.. X97.5..
## (Intercept) 3.609 2.371 5.535
## Weight 0.996 0.995 0.997
## Response_priority2 0.682 0.568 0.817
## Response_priority3 0.537 0.414 0.695
## Transportation_priority2 1.079 0.878 1.328
## Transportation_priority3 0.882 0.699 1.112
## MOIcrash 0.318 0.223 0.449
## MOIfall 0.248 0.171 0.355
## MOIgsw 1.062 0.727 1.539
## MOIother 0.455 0.315 0.651
## MOIsw 1.287 0.876 1.879
## InterventionB_post 1.307 0.901 1.895
## Arrival_Time_of_DayEvening 1.018 0.858 1.208
## Arrival_Time_of_DayMorning 0.843 0.699 1.015
## Arrival_Time_of_DayNight 0.765 0.647 0.903
## Transportation_priority2:InterventionB_post 1.318 0.880 1.973
## Transportation_priority3:InterventionB_post 1.009 0.650 1.566
#For Weight 10X, 20X, 30X
data.frame(round(exp(cbind(Estimate=coef(model1), confint(model1))),3))
## Waiting for profiling to be done...
## Estimate X2.5.. X97.5..
## (Intercept) 3.483 2.308 5.302
## Weight 0.996 0.995 0.997
## Response_priority2 0.683 0.569 0.818
## Response_priority3 0.533 0.410 0.688
## Transportation_priority2 1.174 0.984 1.403
## Transportation_priority3 0.863 0.708 1.053
## InterventionB_post 1.553 1.372 1.758
## MOIcrash 0.315 0.221 0.444
## MOIfall 0.247 0.171 0.354
## MOIgsw 1.054 0.722 1.527
## MOIother 0.452 0.313 0.647
## MOIsw 1.272 0.866 1.857
## Arrival_Time_of_DayEvening 1.015 0.856 1.205
## Arrival_Time_of_DayMorning 0.842 0.699 1.014
## Arrival_Time_of_DayNight 0.764 0.647 0.901
data.frame(round(exp(cbind(Estimate=10*coef(model1), 10*confint(model1))),3)) #with 10 time
## Waiting for profiling to be done...
## Estimate X2.5.. X97.5..
## (Intercept) 263024.620 4281.092 17555484.810
## Weight 0.963 0.952 0.974
## Response_priority2 0.022 0.004 0.135
## Response_priority3 0.002 0.000 0.024
## Transportation_priority2 4.990 0.847 29.552
## Transportation_priority3 0.229 0.031 1.669
## InterventionB_post 81.698 23.673 282.711
## MOIcrash 0.000 0.000 0.000
## MOIfall 0.000 0.000 0.000
## MOIgsw 1.688 0.038 68.921
## MOIother 0.000 0.000 0.013
## MOIsw 11.116 0.237 488.622
## Arrival_Time_of_DayEvening 1.166 0.210 6.463
## Arrival_Time_of_DayMorning 0.179 0.028 1.152
## Arrival_Time_of_DayNight 0.067 0.013 0.354
data.frame(round(exp(cbind(Estimate=20*coef(model1), 20*confint(model1))),3)) #with 20 time
## Waiting for profiling to be done...
## Estimate X2.5.. X97.5..
## (Intercept) 6.918195e+10 18327746.158 3.081950e+14
## Weight 9.270000e-01 0.906 9.480000e-01
## Response_priority2 0.000000e+00 0.000 1.800000e-02
## Response_priority3 0.000000e+00 0.000 1.000000e-03
## Transportation_priority2 2.490100e+01 0.718 8.733090e+02
## Transportation_priority3 5.200000e-02 0.001 2.785000e+00
## InterventionB_post 6.674641e+03 560.416 7.992531e+04
## MOIcrash 0.000000e+00 0.000 0.000000e+00
## MOIfall 0.000000e+00 0.000 0.000000e+00
## MOIgsw 2.848000e+00 0.001 4.750124e+03
## MOIother 0.000000e+00 0.000 0.000000e+00
## MOIsw 1.235550e+02 0.056 2.387518e+05
## Arrival_Time_of_DayEvening 1.359000e+00 0.044 4.177600e+01
## Arrival_Time_of_DayMorning 3.200000e-02 0.001 1.326000e+00
## Arrival_Time_of_DayNight 5.000000e-03 0.000 1.250000e-01
data.frame(round(exp(cbind(Estimate=30*coef(model1), 30*confint(model1))),3)) #with 30 time
## Waiting for profiling to be done...
## Estimate X2.5.. X97.5..
## (Intercept) 1.819656e+16 7.846276e+10 5.410513e+21
## Weight 8.920000e-01 8.630000e-01 9.230000e-01
## Response_priority2 0.000000e+00 0.000000e+00 2.000000e-03
## Response_priority3 0.000000e+00 0.000000e+00 0.000000e+00
## Transportation_priority2 1.242570e+02 6.080000e-01 2.580786e+04
## Transportation_priority3 1.200000e-02 0.000000e+00 4.647000e+00
## InterventionB_post 5.453079e+05 1.326680e+04 2.259574e+07
## MOIcrash 0.000000e+00 0.000000e+00 0.000000e+00
## MOIfall 0.000000e+00 0.000000e+00 0.000000e+00
## MOIgsw 4.807000e+00 0.000000e+00 3.273840e+05
## MOIother 0.000000e+00 0.000000e+00 0.000000e+00
## MOIsw 1.373385e+03 1.300000e-02 1.166594e+08
## Arrival_Time_of_DayEvening 1.584000e+00 9.000000e-03 2.700200e+02
## Arrival_Time_of_DayMorning 6.000000e-03 0.000000e+00 1.527000e+00
## Arrival_Time_of_DayNight 0.000000e+00 0.000000e+00 4.400000e-02
# Install ggplot2 if you haven't already
#install.packages("ggplot2")
library(ggplot2)
# Create your data frame
data <- data.frame(
Weight = c(1, 10, 20, 30),
Odds = c(0.996, 0.963, 0.927, 0.892),
LowerCI = c(0.995, 0.952, 0.906, 0.863),
UpperCI = c(0.997, 0.974, 0.948, 0.923)
)
# Create the plot
ggplot(data, aes(x = Weight, y = Odds)) +
geom_point() + # Add points for the odds
geom_errorbar(aes(ymin = LowerCI, ymax = UpperCI), width = 0.2) + # Add error bars for the CI
labs(title = "Odds with 95% Confidence Intervals", x = "Weight (lb)", y = "Odds") +
theme_minimal()
library(sjPlot)
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
plot_model(model1)
plot_model(model2)
# Splitting the data into training and testing sets
set.seed(123) # For reproducibility
train_indices <- sample(1:nrow(cleaned_data), 0.8*nrow(cleaned_data))
train_data <- cleaned_data[train_indices, ]
test_data <- cleaned_data[-train_indices, ]
# train for model 1
train_model <- glm(Under_10_mins ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day, data = train_data, family = 'binomial')
# Predict on test data
predictions <- predict(train_model, test_data, type = "response")
# Convert predictions to binary outcome
predicted_class <- ifelse(predictions > 0.5, 1, 0)
# Confusion Matrix
confusion_matrix<-table(predicted_class, test_data$Under_10_mins)
print(confusion_matrix)
##
## predicted_class 0 1
## 0 424 217
## 1 122 228
correct_predictions <- sum(diag(confusion_matrix))
total_observations <- nrow(test_data)
accuracy_rate <- correct_predictions / total_observations
print(accuracy_rate)
## [1] 0.6579213
# ROC Curve and AUC
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
# Generate the ROC object
roc_obj <- roc(test_data$Under_10_mins, predictions)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# Plot the ROC Curve with AUC
auc_value <- auc(roc_obj)
plot(roc_obj, main=paste("ROC Curve - AUC:", round(auc_value, 3)), xlab="False Positive Rate", ylab="True Positive Rate")
# Part 5.4 10 folds cross Validation Model1
# Load necessary library
library(boot)
##
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
##
## logit
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-8
x = model.matrix(as.factor(Under_10_mins) ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day, train_data)[,-1]
y = train_data$Under_10_mins
# Logistic
cv.glmnet= cv.glmnet(x, y , alpha = 0, family = "binomial", nfolds = 10)
coef(cv.glmnet, cv.glmnet$lambda.min)[,1]
## (Intercept) Weight
## 0.885106060 -0.003992294
## Response_priority2 Response_priority3
## -0.318922900 -0.619882312
## Transportation_priority2 Transportation_priority3
## 0.157884015 -0.130985523
## MOIcrash MOIfall
## -0.781553546 -0.986576140
## MOIgsw MOIother
## 0.402148748 -0.499294435
## MOIsw InterventionB_post
## 0.541453795 0.430246592
## Arrival_Time_of_DayEvening Arrival_Time_of_DayMorning
## 0.064286529 -0.064900697
## Arrival_Time_of_DayNight
## -0.204986284
# Logistic Model
logistic.model = glmnet(x, y, alpha = 0, family = "binomial",
lambda = cv.glmnet$lambda.min)
Coefficient = c("beta.0", "beta.1", "beta.2", "beta.3", "beta.4", "beta.5",
"beta.6", "beta.7", "beta.8", "beta.9", "beta.10", "beta.11", "beta.12", "beta.13", "beta.14")
Estimation = unname(round(-coef(cv.glmnet, cv.glmnet$lambda.min)[,1], 5))
# Model 1 CV
library(dplyr)
x.test = model.matrix(as.factor(Under_10_mins) ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day, test_data)[,-1]
logistic.prob1 = logistic.model %>% predict(newx = x.test)
predicted.classes.logistic = ifelse(logistic.prob1 > 0.5, "Under 10 Mins", "Over 10 Mins")
# Model accuracy
observed.classes.logistic = test_data$Under_10_mins
mean(predicted.classes.logistic == observed.classes.logistic)
## [1] 0
# Confusion matrix
confusion = table(as.factor(predicted.classes.logistic),
as.factor(test_data$Under_10_mins),
dnn = c("True", "Predicted"))
confusion
## Predicted
## True 0 1
## Over 10 Mins 484 287
## Under 10 Mins 62 158
p.logistic = logistic.model %>% predict(newx = x)
# Accuracy
print(sum(diag(confusion)) / sum(confusion))
## [1] 0.6478305
library(pROC)
# Generate the ROC object
roc_obj <- roc(test_data$Under_10_mins, logistic.prob1)
## Setting levels: control = 0, case = 1
## Warning in roc.default(test_data$Under_10_mins, logistic.prob1): Deprecated use
## a matrix as predictor. Unexpected results may be produced, please pass a
## numeric vector.
## Setting direction: controls < cases
# Plot the ROC Curve with AUC
auc_value <- auc(roc_obj)
plot(roc_obj, main=paste("Model 1 - ROC Curve with 10-flods Cross Validation - AUC:", round(auc_value, 3)), xlab="False Positive Rate", ylab="True Positive Rate")
# Model2 accuracy
# train for model 2
train_model <- glm(Under_10_mins ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day + Transportation_priority*Intervention, data = train_data, family = 'binomial')
# Predict on test data
predictions <- predict(train_model, test_data, type = "response")
# Convert predictions to binary outcome
predicted_class <- ifelse(predictions > 0.5, 1, 0)
# Confusion Matrix
confusion_matrix<-table(predicted_class, test_data$Under_10_mins)
print(confusion_matrix)
##
## predicted_class 0 1
## 0 425 207
## 1 121 238
correct_predictions <- sum(diag(confusion_matrix))
total_observations <- nrow(test_data)
accuracy_rate <- correct_predictions / total_observations
print(accuracy_rate)
## [1] 0.6690212
# ROC Curve and AUC
library(pROC)
# Generate the ROC object
roc_obj <- roc(test_data$Under_10_mins, predictions)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# Plot the ROC Curve with AUC
auc_value <- auc(roc_obj)
plot(roc_obj, main=paste("ROC Curve - AUC:", round(auc_value, 3)), xlab="False Positive Rate", ylab="True Positive Rate")
library(boot)
library(glmnet)
x = model.matrix(as.factor(Under_10_mins) ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day + Transportation_priority*Intervention, train_data)[,-1]
y = train_data$Under_10_mins
# Logistic
cv.glmnet= cv.glmnet(x, y , alpha = 0, family = "binomial", nfolds = 10)
coef(cv.glmnet, cv.glmnet$lambda.min)[,1]
## (Intercept)
## 0.907420145
## Weight
## -0.004017559
## Response_priority2
## -0.316955378
## Response_priority3
## -0.608445964
## Transportation_priority2
## 0.082895427
## Transportation_priority3
## -0.075955950
## MOIcrash
## -0.776001960
## MOIfall
## -0.986994032
## MOIgsw
## 0.407431588
## MOIother
## -0.498903674
## MOIsw
## 0.551874044
## InterventionB_post
## 0.316567843
## Arrival_Time_of_DayEvening
## 0.066076853
## Arrival_Time_of_DayMorning
## -0.063745555
## Arrival_Time_of_DayNight
## -0.206724747
## Transportation_priority2:InterventionB_post
## 0.237366080
## Transportation_priority3:InterventionB_post
## -0.098697821
# Logistic Model
logistic.model = glmnet(x, y, alpha = 0, family = "binomial",
lambda = cv.glmnet$lambda.min)
Coefficient = c("beta.0", "beta.1", "beta.2", "beta.3", "beta.4", "beta.5",
"beta.6", "beta.7", "beta.8", "beta.9", "beta.10", "beta.11", "beta.12", "beta.13", "beta.14")
Estimation = unname(round(-coef(cv.glmnet, cv.glmnet$lambda.min)[,1], 5))
# Model 2 CV
x.test = model.matrix(as.factor(Under_10_mins) ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day + Transportation_priority*Intervention, test_data)[,-1]
logistic.prob2 = logistic.model %>% predict(newx = x.test)
predicted.classes.logistic = ifelse(logistic.prob2 > 0.5, "Under 10 Mins", "Over 10 Mins")
# Model accuracy
observed.classes.logistic = test_data$Under_10_mins
mean(predicted.classes.logistic == observed.classes.logistic)
## [1] 0
# Confusion matrix
confusion = table(as.factor(predicted.classes.logistic),
as.factor(test_data$Under_10_mins),
dnn = c("True", "Predicted"))
confusion
## Predicted
## True 0 1
## Over 10 Mins 483 291
## Under 10 Mins 63 154
p.logistic = logistic.model %>% predict(newx = x)
# Accuracy
print(sum(diag(confusion)) / sum(confusion))
## [1] 0.6427851
library(pROC)
# Generate the ROC object
roc_obj <- roc(test_data$Under_10_mins, logistic.prob2)
## Setting levels: control = 0, case = 1
## Warning in roc.default(test_data$Under_10_mins, logistic.prob2): Deprecated use
## a matrix as predictor. Unexpected results may be produced, please pass a
## numeric vector.
## Setting direction: controls < cases
# Plot the ROC Curve with AUC
auc_value <- auc(roc_obj)
plot(roc_obj, main=paste("Model 2 - ROC Curve with 10-flods Cross Validation - AUC:", round(auc_value, 3)), xlab="False Positive Rate", ylab="True Positive Rate")
# Load necessary library
library(pROC)
# Assuming logistic.prob1 and logistic.prob2 are the predicted probabilities from your two models
roc_obj1 <- roc(test_data$Under_10_mins, logistic.prob1)
## Setting levels: control = 0, case = 1
## Warning in roc.default(test_data$Under_10_mins, logistic.prob1): Deprecated use
## a matrix as predictor. Unexpected results may be produced, please pass a
## numeric vector.
## Setting direction: controls < cases
roc_obj2 <- roc(test_data$Under_10_mins, logistic.prob2)
## Setting levels: control = 0, case = 1
## Warning in roc.default(test_data$Under_10_mins, logistic.prob2): Deprecated use
## a matrix as predictor. Unexpected results may be produced, please pass a
## numeric vector.
## Setting direction: controls < cases
# Calculate AUC for both models
auc_value1 <- auc(roc_obj1)
auc_value2 <- auc(roc_obj2)
# Plot the first ROC curve
plot(roc_obj1, main="ROC Curves Comparison", xlab="False Positive Rate", ylab="True Positive Rate", col="blue")
# Add the second ROC curve
lines(roc_obj2, col="red")
# Add a legend
legend("bottomright", legend=c(paste("Model 1 - AUC:", round(auc_value1, 3)),
paste("Model 2 - AUC:", round(auc_value2, 3))),
col=c("blue", "red"), lwd=2)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:boot':
##
## logit
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
library(alr4)
mmps(model1)
## Warning in mmps(model1): Interactions and/or factors skipped
mmps(model2)
## Warning in mmps(model2): Interactions and/or factors skipped
influencePlot(model1)
## StudRes Hat CookD
## 369 2.0653483 0.002508053 0.0012369820
## 2160 1.2045736 0.012737228 0.0009163225
## 2666 2.0926459 0.003789378 0.0019854891
## 3182 -0.9894323 0.015638885 0.0006707911
## 4804 1.9767581 0.004376927 0.0017554449
influencePlot(model2)