#Library
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(readr)
library(effectsize)
library(pwr)
library(ggplot2)
setwd("/Users/manasa/Documents/Warwick/Term 2/ADA/Group Assignment")
getwd()
## [1] "/Users/manasa/Documents/Warwick/Term 2/ADA/Group Assignment"
# Read the data
LoanData <- read_csv("project_data.csv")
## Rows: 470 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Variant, loanofficer_id
## dbl (20): day, typeI_init, typeI_fin, typeII_init, typeII_fin, agree_init, a...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#print a summary of the structure of LoanData
str(LoanData)
## spc_tbl_ [470 × 22] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Variant : chr [1:470] "Treatment" "Treatment" "Treatment" "Treatment" ...
## $ loanofficer_id : chr [1:470] "qamcqdoe" "qamcqdoe" "qamcqdoe" "qamcqdoe" ...
## $ day : num [1:470] 1 2 3 4 5 6 7 8 9 10 ...
## $ typeI_init : num [1:470] 0 2 3 1 0 0 0 0 0 0 ...
## $ typeI_fin : num [1:470] 0 2 3 2 2 1 1 3 1 0 ...
## $ typeII_init : num [1:470] 2 3 0 1 0 4 1 4 4 2 ...
## $ typeII_fin : num [1:470] 2 3 0 1 0 0 0 1 1 1 ...
## $ agree_init : num [1:470] 7 8 9 8 8 5 8 4 6 9 ...
## $ agree_fin : num [1:470] 10 8 9 9 10 10 10 10 10 10 ...
## $ conflict_init : num [1:470] 2 2 1 2 2 5 2 6 4 1 ...
## $ conflict_fin : num [1:470] 0 2 1 1 0 0 0 0 0 0 ...
## $ revised_per_ai : num [1:470] 2 0 0 1 2 5 2 6 4 1 ...
## $ revised_agst_ai : num [1:470] 0 0 0 0 0 0 0 0 0 0 ...
## $ fully_complt : num [1:470] 9 10 10 10 10 10 10 10 10 10 ...
## $ confidence_init_total: num [1:470] 706 911 710 694 683 743 993 1000 1000 1000 ...
## $ confidence_fin_total : num [1:470] 913 974 970 961 1000 1000 1000 1000 1000 1000 ...
## $ complt_init : num [1:470] 9 10 10 10 10 10 10 10 10 10 ...
## $ complt_fin : num [1:470] 10 10 10 10 10 10 10 10 10 10 ...
## $ ai_typeI : num [1:470] 0 1 2 1 2 1 1 3 1 0 ...
## $ ai_typeII : num [1:470] 2 2 0 1 0 0 0 1 1 1 ...
## $ badloans_num : num [1:470] 4 5 2 3 0 4 1 4 4 3 ...
## $ goodloans_num : num [1:470] 6 5 8 7 10 6 9 6 6 7 ...
## - attr(*, "spec")=
## .. cols(
## .. Variant = col_character(),
## .. loanofficer_id = col_character(),
## .. day = col_double(),
## .. typeI_init = col_double(),
## .. typeI_fin = col_double(),
## .. typeII_init = col_double(),
## .. typeII_fin = col_double(),
## .. agree_init = col_double(),
## .. agree_fin = col_double(),
## .. conflict_init = col_double(),
## .. conflict_fin = col_double(),
## .. revised_per_ai = col_double(),
## .. revised_agst_ai = col_double(),
## .. fully_complt = col_double(),
## .. confidence_init_total = col_double(),
## .. confidence_fin_total = col_double(),
## .. complt_init = col_double(),
## .. complt_fin = col_double(),
## .. ai_typeI = col_double(),
## .. ai_typeII = col_double(),
## .. badloans_num = col_double(),
## .. goodloans_num = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
Result: All columns have the appropriate data type/class.
summary(LoanData)
## Variant loanofficer_id day typeI_init
## Length:470 Length:470 Min. : 1.0 Min. : 0.000
## Class :character Class :character 1st Qu.: 3.0 1st Qu.: 1.000
## Mode :character Mode :character Median : 5.5 Median : 2.000
## Mean : 5.5 Mean : 2.619
## 3rd Qu.: 8.0 3rd Qu.: 4.000
## Max. :10.0 Max. :10.000
## typeI_fin typeII_init typeII_fin agree_init
## Min. :0.000 Min. :0.000 Min. :0.0000 Min. : 0.00
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.: 2.25
## Median :2.000 Median :1.000 Median :0.0000 Median : 7.00
## Mean :1.904 Mean :1.136 Mean :0.7298 Mean : 5.64
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.: 8.00
## Max. :8.000 Max. :5.000 Max. :3.0000 Max. :10.00
## agree_fin conflict_init conflict_fin revised_per_ai
## Min. : 0.000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.: 5.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000
## Median : 8.000 Median :2.000 Median :1.000 Median :0.0000
## Mean : 6.602 Mean :1.938 Mean :1.253 Mean :0.8149
## 3rd Qu.: 9.000 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :10.000 Max. :8.000 Max. :8.000 Max. :8.0000
## revised_agst_ai fully_complt confidence_init_total confidence_fin_total
## Min. :0.00000 Min. : 0.000 Min. : 50.0 Min. : 0.0
## 1st Qu.:0.00000 1st Qu.: 6.000 1st Qu.: 485.5 1st Qu.: 297.2
## Median :0.00000 Median :10.000 Median : 654.0 Median : 649.5
## Mean :0.08511 Mean : 7.579 Mean : 624.7 Mean : 561.9
## 3rd Qu.:0.00000 3rd Qu.:10.000 3rd Qu.: 770.5 3rd Qu.: 810.8
## Max. :4.00000 Max. :10.000 Max. :1000.0 Max. :1000.0
## complt_init complt_fin ai_typeI ai_typeII badloans_num
## Min. : 1.00 Min. : 0.000 Min. :0.000 Min. :0.000 Min. :0
## 1st Qu.:10.00 1st Qu.: 9.000 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:2
## Median :10.00 Median :10.000 Median :2.000 Median :1.000 Median :3
## Mean : 9.47 Mean : 7.855 Mean :1.685 Mean :1.123 Mean :3
## 3rd Qu.:10.00 3rd Qu.:10.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:4
## Max. :10.00 Max. :10.000 Max. :5.000 Max. :3.000 Max. :5
## goodloans_num
## Min. : 5
## 1st Qu.: 6
## Median : 7
## Mean : 7
## 3rd Qu.: 8
## Max. :10
Result : After checking the minimum, maximum, and mean values, no potential outliers were identified. The minimum and maximum values of each column fall within the expected range.
library(ggplot2)
library(dplyr)
library(tidyr)
# List of columns to plot
columns_to_plot <- c("day", "typeI_init", "typeI_fin", "typeII_init", "typeII_fin",
"agree_init", "agree_fin", "conflict_init", "conflict_fin",
"revised_per_ai", "revised_agst_ai", "fully_complt",
"confidence_init_total", "confidence_fin_total",
"complt_init", "complt_fin", "ai_typeI", "ai_typeII",
"badloans_num", "goodloans_num")
# Convert data to long format
LoanData_long <- LoanData %>%
select(all_of(columns_to_plot)) %>%
pivot_longer(cols = everything(), names_to = "Variable", values_to = "Value")
# Plot histograms using facet_wrap
ggplot(LoanData_long, aes(x = Value)) +
geom_histogram(binwidth = 1, fill = "skyblue", color = "black", alpha = 0.7) +
facet_wrap(~ Variable, scales = "free") + # Free scales for better visualization
labs(title = "Histograms of Selected Columns", x = "Value", y = "Frequency") +
theme_minimal()
Result : No potential outliers were identified, even though some values
contain a high frequency of 0 and 10. This is expected due to the nature
of the data
na_counts <- colSums(is.na(LoanData))
print(na_counts)
## Variant loanofficer_id day
## 0 0 0
## typeI_init typeI_fin typeII_init
## 0 0 0
## typeII_fin agree_init agree_fin
## 0 0 0
## conflict_init conflict_fin revised_per_ai
## 0 0 0
## revised_agst_ai fully_complt confidence_init_total
## 0 0 0
## confidence_fin_total complt_init complt_fin
## 0 0 0
## ai_typeI ai_typeII badloans_num
## 0 0 0
## goodloans_num
## 0
Result: found 0 N/A
duplicates <- LoanData[duplicated(LoanData), ]
print(duplicates)
## # A tibble: 0 × 22
## # … with 22 variables: Variant <chr>, loanofficer_id <chr>, day <dbl>,
## # typeI_init <dbl>, typeI_fin <dbl>, typeII_init <dbl>, typeII_fin <dbl>,
## # agree_init <dbl>, agree_fin <dbl>, conflict_init <dbl>, conflict_fin <dbl>,
## # revised_per_ai <dbl>, revised_agst_ai <dbl>, fully_complt <dbl>,
## # confidence_init_total <dbl>, confidence_fin_total <dbl>, complt_init <dbl>,
## # complt_fin <dbl>, ai_typeI <dbl>, ai_typeII <dbl>, badloans_num <dbl>,
## # goodloans_num <dbl>
Result: No duplicate values
unique_values_Variant <- sort(unique(LoanData$Variant))
print(unique_values_Variant)
## [1] "Control" "Treatment"
Result: There are only 2 variants, which are Control and Treatment, in the data.
# Group by Variant and count rows
grouped_counts <- LoanData %>%
group_by(Variant) %>%
summarise(Count = n())
# Print the result
print(grouped_counts)
## # A tibble: 2 × 2
## Variant Count
## <chr> <int>
## 1 Control 190
## 2 Treatment 280
# Plot the grouped counts with labels
ggplot(grouped_counts, aes(x = Variant, y = Count, fill = Variant)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Count), vjust = -0.5, size = 5) + # Add labels above bars
labs(title = "Number of Control and Treatment Experiments in Total",
x = "Variant",
y = "Count") +
theme_minimal()
Result : There are 190 controls and 280 treatments in total
# Count the number of each variant (control/treatment) per day
variant_per_day <- LoanData %>%
group_by(day, Variant) %>%
summarise(count = n(), .groups = "drop")
# Print the result
print(variant_per_day)
## # A tibble: 20 × 3
## day Variant count
## <dbl> <chr> <int>
## 1 1 Control 19
## 2 1 Treatment 28
## 3 2 Control 19
## 4 2 Treatment 28
## 5 3 Control 19
## 6 3 Treatment 28
## 7 4 Control 19
## 8 4 Treatment 28
## 9 5 Control 19
## 10 5 Treatment 28
## 11 6 Control 19
## 12 6 Treatment 28
## 13 7 Control 19
## 14 7 Treatment 28
## 15 8 Control 19
## 16 8 Treatment 28
## 17 9 Control 19
## 18 9 Treatment 28
## 19 10 Control 19
## 20 10 Treatment 28
# Create a bar plot
ggplot(variant_per_day, aes(x = factor(day), y = count, fill = Variant)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Count of Control and Treatment per Day",
x = "Day",
y = "Count",
fill = "Variant") +
theme_minimal()
Result : On each day from day 1 to day 10, 19 control and 28 treatment
experiments were conducted.
# Check if each loan officer has conducted both control and treatment experiments
loan_officer_experiment <- LoanData %>%
group_by(loanofficer_id) %>%
summarise(experiment_types = n_distinct(Variant)) %>%
mutate(Control_and_Treatment = ifelse(experiment_types > 1, "Both Control and Treatment", "Only One experiment"))
# Print the result
print(loan_officer_experiment)
## # A tibble: 47 × 3
## loanofficer_id experiment_types Control_and_Treatment
## <chr> <int> <chr>
## 1 0899qxvc 1 Only One experiment
## 2 09pij0e2 1 Only One experiment
## 3 0g7pi6g8 1 Only One experiment
## 4 0gh7r2hr 1 Only One experiment
## 5 1ckkyukp 1 Only One experiment
## 6 1ha5khxo 1 Only One experiment
## 7 2twvlktb 1 Only One experiment
## 8 2udootyt 1 Only One experiment
## 9 4cdwcblq 1 Only One experiment
## 10 530lfgx0 1 Only One experiment
## # … with 37 more rows
Result: All loan officers have conducted either control or treatment experiments.
# Count the number of Control and Treatment experiments per loan officer
experiment_count_per_officer <- LoanData %>%
group_by(loanofficer_id, Variant) %>%
summarise(experiment_count = n(), .groups = "drop")
# Print the result
print(experiment_count_per_officer)
## # A tibble: 47 × 3
## loanofficer_id Variant experiment_count
## <chr> <chr> <int>
## 1 0899qxvc Treatment 10
## 2 09pij0e2 Treatment 10
## 3 0g7pi6g8 Control 10
## 4 0gh7r2hr Control 10
## 5 1ckkyukp Treatment 10
## 6 1ha5khxo Treatment 10
## 7 2twvlktb Treatment 10
## 8 2udootyt Control 10
## 9 4cdwcblq Treatment 10
## 10 530lfgx0 Treatment 10
## # … with 37 more rows
Result: All loan officers have conducted either 10 control or 10 treatment experiments.
unique_count_loanofficer_id <- length(unique(LoanData$loanofficer_id))
print(unique_count_loanofficer_id)
## [1] 47
Result : Found 47 unique loanofficer id in total
There are 47 loan officers in total. Each loan officer conducted either Control or Treatment experiments. 19 conducted control experiments, and 28 conducted treatment experiments. Each officer participated in 10 experiments.
LoanData_grouped <- LoanData %>%
mutate(total_loans = badloans_num + goodloans_num) %>%
group_by(total_loans) %>%
summarise(count = n())
# Print the result
print(LoanData_grouped)
## # A tibble: 1 × 2
## total_loans count
## <dbl> <int>
## 1 10 470
Result : All rows have a total of 10 bad and good loans combined.
LoanData %>%
group_by(Variant) %>%
summarise(avg_good = mean(goodloans_num), avg_bad = mean(badloans_num))
## # A tibble: 2 × 3
## Variant avg_good avg_bad
## <chr> <dbl> <dbl>
## 1 Control 7 3
## 2 Treatment 7 3
Result : On average, 7 good loans and 3 bad loans were given to loan officers in both the control and treatment groups during the experiment
filtered_complt_fin10 <- LoanData %>%
filter(complt_fin != 10) %>%
group_by(Variant) %>%
summarise(count = n())
# Print the result
print(filtered_complt_fin10)
## # A tibble: 2 × 2
## Variant count
## <chr> <int>
## 1 Control 97
## 2 Treatment 23
Result : There are 97 control and 23 treatment experiments where fewer than 10 loan decisions were completed per day in the final stage
# Sum of agree_fin and conflict_fin
LoanData <- LoanData %>%
mutate(total_AgreeConflict = agree_fin+conflict_fin)
# Check if any rows have total_loans not equal to 10
rows_AgreeConflict <- LoanData %>%
filter(total_AgreeConflict ==0)
# Print the rows where the sum of "agree_fin", "conflict_fin" is 0
print(rows_AgreeConflict)
## # A tibble: 90 × 23
## Variant loano…¹ day typeI…² typeI…³ typeI…⁴ typeI…⁵ agree…⁶ agree…⁷ confl…⁸
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control 2udoot… 1 2 0 1 0 0 0 0
## 2 Control 2udoot… 2 4 0 1 0 0 0 0
## 3 Control 2udoot… 3 3 0 2 0 0 0 0
## 4 Control 2udoot… 4 3 0 2 0 0 0 0
## 5 Control 2udoot… 5 3 0 0 0 0 0 0
## 6 Control 2udoot… 6 5 0 0 0 0 0 0
## 7 Control 2udoot… 7 5 0 0 0 0 0 0
## 8 Control 2udoot… 8 4 0 1 0 0 0 0
## 9 Control 2udoot… 9 4 0 0 0 0 0 0
## 10 Control 2udoot… 10 2 0 0 0 0 0 0
## # … with 80 more rows, 13 more variables: conflict_fin <dbl>,
## # revised_per_ai <dbl>, revised_agst_ai <dbl>, fully_complt <dbl>,
## # confidence_init_total <dbl>, confidence_fin_total <dbl>, complt_init <dbl>,
## # complt_fin <dbl>, ai_typeI <dbl>, ai_typeII <dbl>, badloans_num <dbl>,
## # goodloans_num <dbl>, total_AgreeConflict <dbl>, and abbreviated variable
## # names ¹loanofficer_id, ²typeI_init, ³typeI_fin, ⁴typeII_init, ⁵typeII_fin,
## # ⁶agree_init, ⁷agree_fin, ⁸conflict_init
Result : 90 rows with the total value of “agree_fin” and “conflict_fin” equal to 0 which means that there are potential that officer conducted the final decision without seeing any computer model decisions.
# Filter rows based on conditions
filtered_rows <- LoanData %>%
filter(complt_init > 0 & complt_fin == 0)
# View the filtered rows
print(filtered_rows)
## # A tibble: 90 × 23
## Variant loano…¹ day typeI…² typeI…³ typeI…⁴ typeI…⁵ agree…⁶ agree…⁷ confl…⁸
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control 2udoot… 1 2 0 1 0 0 0 0
## 2 Control 2udoot… 2 4 0 1 0 0 0 0
## 3 Control 2udoot… 3 3 0 2 0 0 0 0
## 4 Control 2udoot… 4 3 0 2 0 0 0 0
## 5 Control 2udoot… 5 3 0 0 0 0 0 0
## 6 Control 2udoot… 6 5 0 0 0 0 0 0
## 7 Control 2udoot… 7 5 0 0 0 0 0 0
## 8 Control 2udoot… 8 4 0 1 0 0 0 0
## 9 Control 2udoot… 9 4 0 0 0 0 0 0
## 10 Control 2udoot… 10 2 0 0 0 0 0 0
## # … with 80 more rows, 13 more variables: conflict_fin <dbl>,
## # revised_per_ai <dbl>, revised_agst_ai <dbl>, fully_complt <dbl>,
## # confidence_init_total <dbl>, confidence_fin_total <dbl>, complt_init <dbl>,
## # complt_fin <dbl>, ai_typeI <dbl>, ai_typeII <dbl>, badloans_num <dbl>,
## # goodloans_num <dbl>, total_AgreeConflict <dbl>, and abbreviated variable
## # names ¹loanofficer_id, ²typeI_init, ³typeI_fin, ⁴typeII_init, ⁵typeII_fin,
## # ⁶agree_init, ⁷agree_fin, ⁸conflict_init
Result : 90 rows were found which all from Control experiments.
Based on our previous found, we also noted that the confidence score in the final stage from rows where sum of “agree_fin”, “conflict_fin” is 0 OR rows where complt_init > 0 & complt_fin == 0 , all have confidence score as 0
### Confidence score
LoanData %>%
select(complt_fin,confidence_fin_total)
## # A tibble: 470 × 2
## complt_fin confidence_fin_total
## <dbl> <dbl>
## 1 10 913
## 2 10 974
## 3 10 970
## 4 10 961
## 5 10 1000
## 6 10 1000
## 7 10 1000
## 8 10 1000
## 9 10 1000
## 10 10 1000
## # … with 460 more rows
####Sum total value of confidence score in the final stage from all rows where sum of “agree_fin”, “conflict_fin” is 0
total_confidence_0 <- rows_AgreeConflict %>%
summarise(total = sum(confidence_fin_total, na.rm = TRUE))
print(total_confidence_0)
## # A tibble: 1 × 1
## total
## <dbl>
## 1 0
####Sum total value of confidence score in the final stage from all rows where complt_init > 0 & complt_fin == 0
total_confidence_withoutAI <- filtered_rows %>%
summarise(total = sum(confidence_fin_total, na.rm = TRUE))
print(total_confidence_withoutAI)
## # A tibble: 1 × 1
## total
## <dbl>
## 1 0
With the total confidence score in the final stage being 0, we assume that for experiments where the sum of agree_fin and conflict_fin is 0, or where complt_init > 0 and complt_fin == 0, loan officers are likely not considering the computer model in their final decision. This may be due to their low confidence in the model or because they did not make any decision in the final stage, resulting in a confidence score of 0
filtered_agr_conf <- LoanData %>%
filter((agree_init + conflict_init != complt_init) | (agree_fin + conflict_fin != complt_fin))
filtered_agr_conf_AI <- filtered_agr_conf %>%
filter(!(complt_init > 0 & complt_fin == 0))
# View the filtered rows
#print(filtered_agr_conf_AI )
filtered_agr_conf_AI %>%
select(Variant, loanofficer_id, agree_init, conflict_init, complt_init, agree_fin,conflict_fin,complt_fin) %>%
print()
## # A tibble: 6 × 8
## Variant loanofficer_id agree_init conflict…¹ compl…² agree…³ confl…⁴ compl…⁵
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Treatment 92vdohom 9 0 10 7 2 9
## 2 Control qwun9ha5 4 4 10 6 2 8
## 3 Treatment envu2p1p 6 1 8 6 1 7
## 4 Treatment envu2p1p 3 3 10 6 0 6
## 5 Treatment envu2p1p 5 3 10 6 2 8
## 6 Treatment 9lejzokf 1 7 10 6 2 8
## # … with abbreviated variable names ¹conflict_init, ²complt_init, ³agree_fin,
## # ⁴conflict_fin, ⁵complt_fin
Result : Although the numbers from agree_init and conflict_init do not add up to complt_init, the numbers from agree_fin and conflict_fin do add up to complt_fin. Since our main focus is on the final stage, where loan officers see the computer prediction results, and the mismatches in the initial stage are minor, we have decided to keep these rows unchanged. The inconsistencies in the initial stage may be due to staff forgetting to record the agreement or disagreement results. Additionally, given our already small sample size, removing or modifying these six rows could further reduce the data quality. Therefore, we have chosen to retain them as they are.
# Remove rows where complt_init > 0 and complt_fin == 0
df <- LoanData %>%
filter(!(complt_init > 0 & complt_fin == 0))
# View the remaining rows
print(df)
## # A tibble: 380 × 23
## Variant loano…¹ day typeI…² typeI…³ typeI…⁴ typeI…⁵ agree…⁶ agree…⁷ confl…⁸
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Treatm… qamcqd… 1 0 0 2 2 7 10 2
## 2 Treatm… qamcqd… 2 2 2 3 3 8 8 2
## 3 Treatm… qamcqd… 3 3 3 0 0 9 9 1
## 4 Treatm… qamcqd… 4 1 2 1 1 8 9 2
## 5 Treatm… qamcqd… 5 0 2 0 0 8 10 2
## 6 Treatm… qamcqd… 6 0 1 4 0 5 10 5
## 7 Treatm… qamcqd… 7 0 1 1 0 8 10 2
## 8 Treatm… qamcqd… 8 0 3 4 1 4 10 6
## 9 Treatm… qamcqd… 9 0 1 4 1 6 10 4
## 10 Treatm… qamcqd… 10 0 0 2 1 9 10 1
## # … with 370 more rows, 13 more variables: conflict_fin <dbl>,
## # revised_per_ai <dbl>, revised_agst_ai <dbl>, fully_complt <dbl>,
## # confidence_init_total <dbl>, confidence_fin_total <dbl>, complt_init <dbl>,
## # complt_fin <dbl>, ai_typeI <dbl>, ai_typeII <dbl>, badloans_num <dbl>,
## # goodloans_num <dbl>, total_AgreeConflict <dbl>, and abbreviated variable
## # names ¹loanofficer_id, ²typeI_init, ³typeI_fin, ⁴typeII_init, ⁵typeII_fin,
## # ⁶agree_init, ⁷agree_fin, ⁸conflict_init
control_count <- sum(df$Variant == "Control")
treatment_count <- sum(df$Variant == "Treatment")
print(control_count)
## [1] 100
print(treatment_count)
## [1] 280
## Set categorical variable
df$Variant <- factor(df$Variant)
First, we can evaluate the performance of the new model using the following metrics as OEC (Overall Evaluation Criteria):
Both errors are considered after the officer reviews the computer prediction (as it is in the current process).
df1<-df
# Calculate Type I and Type II error rates aggregating the information by loan officer ID
df1 <- df1 %>% group_by(Variant,loanofficer_id) %>%
summarise(
Type_I_Error_Rate = sum(typeI_fin) / sum(goodloans_num), # False Positives / Total Actual Negatives
Type_II_Error_Rate = sum(typeII_fin) / sum(badloans_num) # False Negatives / Total Actual Positives
)
## `summarise()` has grouped output by 'Variant'. You can override using the
## `.groups` argument.
df1 <- df1 %>%
mutate(
Type_I_Error_Rate = ifelse(is.na(Type_I_Error_Rate),0,Type_I_Error_Rate), # NA with 0
Type_II_Error_Rate = ifelse(is.na(Type_II_Error_Rate),0,Type_II_Error_Rate) # NA with 0
)
Type_I_Error_Rate
t.test(
Type_I_Error_Rate ~ Variant,
data = df1,
var.equal = FALSE) # assuming samples have unequal variances (using Welch t-test)
##
## Welch Two Sample t-test
##
## data: Type_I_Error_Rate by Variant
## t = 4.0648, df = 11.019, p-value = 0.001861
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## 0.1035696 0.3480630
## sample estimates:
## mean in group Control mean in group Treatment
## 0.5028571 0.2770408
The type I error in control is greater than in treatment.
Type_II_Error_Rate
t.test(
Type_II_Error_Rate ~ Variant,
data = df1,
var.equal = FALSE) # assuming samples have unequal variances (using Welch t-test)
##
## Welch Two Sample t-test
##
## data: Type_II_Error_Rate by Variant
## t = 3.4409, df = 10.923, p-value = 0.005571
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## 0.04514467 0.20580771
## sample estimates:
## mean in group Control mean in group Treatment
## 0.3933333 0.2678571
The type II error in control is greater than in treatment.
The analysis of the two metrics (Ratio of Error Type I, and Error Type II) indicates that the new model is performing better than the existing model.
# Compute mean OEC for each Variant
mean_OEC_each_Variant <- df1 %>%
group_by(Variant) %>%
summarise(mean_Type_I_Error_Rate = mean(Type_I_Error_Rate),
mean_Type_II_Error_Rate = mean(Type_II_Error_Rate))
# View mean OEC
print(mean_OEC_each_Variant)
## # A tibble: 2 × 3
## Variant mean_Type_I_Error_Rate mean_Type_II_Error_Rate
## <fct> <dbl> <dbl>
## 1 Control 0.503 0.393
## 2 Treatment 0.277 0.268
# Compute pairwise % differences in OEC between pairs of variants
pairwise_diff <- mean_OEC_each_Variant %>%
summarise(
Dif_Type_I_Error_Rate = mean_Type_I_Error_Rate[Variant == "Treatment"] - mean_Type_I_Error_Rate[Variant == "Control"],
Dif_Type_II_Error_Rate = mean_Type_II_Error_Rate[Variant == "Treatment"] - mean_Type_II_Error_Rate[Variant == "Control"],
Perc_Type_I_Error_Rate = (Dif_Type_I_Error_Rate / mean_Type_I_Error_Rate[Variant == "Control"]) * 100,
Perc_Type_II_Error_Rate = (Dif_Type_II_Error_Rate / mean_Type_II_Error_Rate[Variant == "Control"]) * 100
)
perc_dif <- pairwise_diff %>% select(Perc_Type_I_Error_Rate,Perc_Type_II_Error_Rate)
# View pairwise differences
print(perc_dif)
## # A tibble: 1 × 2
## Perc_Type_I_Error_Rate Perc_Type_II_Error_Rate
## <dbl> <dbl>
## 1 -44.9 -31.9
Treatment (new model) reduce the type I error compared to Control (existing model) by 44.9%.
Treatment (new model) reduce the type II error compared to Control (existing model) by 31.9%.
Type_I_Error_Rate
Control = df1$Type_I_Error_Rate[df1$Variant == "Control"]
Treatment = df1$Type_I_Error_Rate[df1$Variant == "Treatment"]
cohens_d(Treatment, Control) # compute effect size of difference between Treatment & Control
## Cohen's d | 95% CI
## --------------------------
## -1.96 | [-2.80, -1.10]
##
## - Estimated using pooled SD.
effectsize::interpret_cohens_d(-1.96)
## [1] "large"
## (Rules: cohen1988)
Treatment (new model) significantly reduced (p < 5.76e-06, d = -1.96) the error type I ratio compared to Control (existing model) by 44.9%.
Type_II_Error_Rate
Control = df1$Type_II_Error_Rate[df1$Variant == "Control"]
Treatment = df1$Type_II_Error_Rate[df1$Variant == "Treatment"]
cohens_d(Treatment, Control) # compute effect size of difference between Treatment & Control
## Cohen's d | 95% CI
## --------------------------
## -1.67 | [-2.48, -0.84]
##
## - Estimated using pooled SD.
effectsize::interpret_cohens_d(-1.67)
## [1] "large"
## (Rules: cohen1988)
Treatment (new model) significantly reduced (p < 6.12e-05, d = -1.67) the error type II ratio compared to Control (existing model) by 31.9%.
Treatment (new model) significantly reduced (p < 5.76e-06, d = -1.96) the error type I ratio compared to Control (existing model) by 44.9%. The reduction in Type I error with the new model will retain more good loans (not default), increasing the company’s revenue.
Treatment (new model) significantly reduced (p < 6.12e-05, d = -1.67) the error type II ratio compared to Control (existing model) by 31.9%. The reduction in Type II error with the new model will help the company minimize financial losses.
The positive impact of the new model in reducing Error Types I and II has both statistical and practical significance, taking into account the p-value, Cohen’s d measure, and the percentage change.Therefore, the company should adopt the new model.
# Aggregate data by loanofficer_id and Variant
df2 <- df %>%
group_by(loanofficer_id, Variant) %>%
summarise(
agree_fin = sum(agree_fin, na.rm = TRUE),
complt_fin = sum(complt_fin, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(agree_ratio = agree_fin / complt_fin)
# Independent t-test
t_test_agreement_ratio <- t.test(agree_ratio ~ Variant, data = df2, var.equal = FALSE)
print(t_test_agreement_ratio)
##
## Welch Two Sample t-test
##
## data: agree_ratio by Variant
## t = -3.4329, df = 11.383, p-value = 0.005333
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## -0.20213704 -0.04459309
## sample estimates:
## mean in group Control mean in group Treatment
## 0.7491300 0.8724951
A t-test showed a statistically significant difference between the control and treatment groups, t(11.38) = -3.43, p = 0.0053. The 95% confidence interval (-0.2021, -0.0446) confirms the difference between the groups. The mean proportion of decisions agreeing with AI predictions was higher in the treatment group (M = 0.8725) than in the control group (M = 0.7491), suggesting that staff using the new AI model were significantly more likely to align their decisions with AI recommendations compared to those using the old AI model.
# Effect size (Cohen’s d)
Control_Agreement_Ratio <- df2$agree_ratio[df2$Variant == "Control"]
Treatment_Agreement_Ratio <- df2$agree_ratio[df2$Variant == "Treatment"]
cohens_d(Control_Agreement_Ratio , Treatment_Agreement_Ratio)
## Cohen's d | 95% CI
## --------------------------
## -1.60 | [-2.40, -0.78]
##
## - Estimated using pooled SD.
effectsize::interpret_cohens_d(-1.60)
## [1] "large"
## (Rules: cohen1988)
The effect size, as measured by Cohen’s d = -1.60 (95% CI: [-2.40, -0.78]), suggests a large difference between the control and treatment groups. This indicates that the new AI model had a substantial impact on the proportion of decisions agreeing with AI predictions.
Conclusion A Welch two-sample t-test showed a significant difference between the control and treatment groups, t(11.38) = -3.43, p = 0.0053. The mean proportion of decisions agreeing with AI predictions was higher in the treatment group (M = 0.8725) than in the control group (M = 0.7491). Cohen’s d = -1.60 (95% CI: [-2.40, -0.78]) suggests a large effect size, indicating a substantial impact of the new AI model on decision-making.
# Hypothesis: Higher confidence_fin_total in Treatment suggests improved model effectiveness
# Independent t-test
t_test_confidence <- t.test(confidence_fin_total ~ Variant, data = df, var.equal = FALSE)
print(t_test_confidence)
##
## Welch Two Sample t-test
##
## data: confidence_fin_total by Variant
## t = -6.0989, df = 222.19, p-value = 4.687e-09
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## -179.29481 -91.72233
## sample estimates:
## mean in group Control mean in group Treatment
## 595.1200 730.6286
The p-value is extremely small (p < 0.001), meaning we reject the null hypothesis. There is a statistically significant difference in final confidence between the Control and Treatment groups. Loan officers in the Treatment group have significantly higher confidence in their final decisions compared to the Control group.
# Effect size (Cohen’s d)
Control_Confidence <- df$confidence_fin_total[df$Variant == "Control"]
Treatment_Confidence <- df$confidence_fin_total[df$Variant == "Treatment"]
cohens_d(Control_Confidence ,Treatment_Confidence)
## Cohen's d | 95% CI
## --------------------------
## -0.63 | [-0.86, -0.40]
##
## - Estimated using pooled SD.
effectsize::interpret_cohens_d(-0.63)
## [1] "medium"
## (Rules: cohen1988)
Cohen’s d = -0.63 95% Confidence Interval: [-0.86, -0.40] Interpretation: Moderate-to-large effect size This suggests a meaningful practical difference—the AI model enhances confidence levels among loan officers.
Conclusion The p-value is extremely small (p < 0.001), meaning we reject the null hypothesis. There is a statistically significant difference in final confidence between the Control and Treatment groups. Loan officers in the Treatment group have significantly higher confidence in their final decisions compared to the Control group.
# Aggregate data at the loan officer level
df4 <- df %>%
group_by(loanofficer_id, Variant) %>%
summarise(revised_ratio = sum(revised_per_ai) / sum(complt_fin), .groups = "drop")
# Perform t-test on the aggregated data
t_test_revised_ratio <- t.test(
revised_ratio ~ Variant,
data = df4,
var.equal = FALSE
)
print(t_test_revised_ratio)
##
## Welch Two Sample t-test
##
## data: revised_ratio by Variant
## t = -2.9706, df = 34.614, p-value = 0.005371
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## -0.10901678 -0.02048277
## sample estimates:
## mean in group Control mean in group Treatment
## 0.05545891 0.12020869
A t-test showed a statistically significant difference between the control and treatment groups, t(34.61) = -2.97, p = 0.0054. The 95% confidence interval (-0.1090, -0.0205) confirms the difference between the groups. The mean revised_ratio was higher in the treatment group (M = 0.1202) than in the control group (M = 0.0555), suggesting that staff using the new AI model were significantly more likely to revise decisions following AI predictions compared to those using the old AI model.
# Compute Cohen’s d for revised_ratio
Control_Revised_Ratio <- df4$revised_ratio[df4$Variant == "Control"]
Treatment_Revised_Ratio <- df4$revised_ratio[df4$Variant == "Treatment"]
cohens_d(Control_Revised_Ratio,Treatment_Revised_Ratio)
## Cohen's d | 95% CI
## --------------------------
## -0.77 | [-1.51, -0.02]
##
## - Estimated using pooled SD.
effectsize::interpret_cohens_d(-0.77)
## [1] "medium"
## (Rules: cohen1988)
The effect size, as measured by Cohen’s d = -0.77 (95% CI: [-1.51, -0.02]), suggests a moderate difference between the control and treatment groups. This indicates that the new AI model had a meaningful impact on the proportion of decisions revised following AI predictions.
Conclusion : A Welch t-test revealed a significant difference between the control and treatment groups, t(34.61) = -2.97, p = 0.00537. The treatment group had a higher mean proportion of decisions revised (M = 0.1202) compared to the control group (M = 0.0555). Cohen’s d = -0.77 (95% CI: [-1.51, -0.02]) suggests a moderate effect of the new AI model.
Type II Error Rate = False Negatives / Total Actual Positives = typeII_fin / badloans_num - Hypothesis: The new model will reduce the Type II Error Rate – single tailed t-test
df5<-LoanData
# Calculate Type I and Type II error rates aggregating the information by loan officer ID
df5 <- df5 %>% group_by(Variant,loanofficer_id) %>%
summarise(
Type_II_Error_Rate = sum(typeII_fin) / sum(badloans_num) # False Negatives / Total Actual Positives
)
## `summarise()` has grouped output by 'Variant'. You can override using the
## `.groups` argument.
df5 <- df5 %>%
mutate(
Type_II_Error_Rate = ifelse(is.na(Type_II_Error_Rate),0,Type_II_Error_Rate) # NA with 0
)
Type_II_Error_Rate
t.test(
Type_II_Error_Rate ~ Variant,
data = df5,
var.equal = FALSE) # assuming samples have unequal variances (using Welch t-test)
##
## Welch Two Sample t-test
##
## data: Type_II_Error_Rate by Variant
## t = -1.1967, df = 19.84, p-value = 0.2455
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## -0.16694635 0.04526716
## sample estimates:
## mean in group Control mean in group Treatment
## 0.2070175 0.2678571
The type II error in control is less than in treatment.
The analysis of the Ratio of Error Type II indicates that the new model is not performing better than the existing model.
Type_II_Error_Rate
Control = df5$Type_II_Error_Rate[df5$Variant == "Control"]
Treatment = df5$Type_II_Error_Rate[df5$Variant == "Treatment"]
cohens_d(Treatment, Control) # compute effect size of difference between Treatment & Control
## Cohen's d | 95% CI
## -------------------------
## 0.42 | [-0.17, 1.01]
##
## - Estimated using pooled SD.
effectsize::interpret_cohens_d(0.42)
## [1] "small"
## (Rules: cohen1988)
Treatment (new model) does not significantly reduced (p < 0.163, d = 0.42) the error type II ratio compared to Control (existing model)
#Estimate sample size
pwr.t.test(power = .8, # 80% power
d = 0.2, # Cohen's d
sig.level = 0.05, # threshold for p-val
type = "two.sample") # eg., this is for treatment vs control
##
## Two-sample t test power calculation
##
## n = 393.4057
## d = 0.2
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
##
## NOTE: n is number in *each* group