|
@@ -112,7 +112,7 @@ x_min <- min(min(imdb$year, na.rm = TRUE), min(groupLens$year, na.rm = TRUE), mi
|
|
|
x_max <- max(max(imdb$year, na.rm = TRUE), max(groupLens$year, na.rm = TRUE), max(netflix$year, na.rm = TRUE))
|
|
x_max <- max(max(imdb$year, na.rm = TRUE), max(groupLens$year, na.rm = TRUE), max(netflix$year, na.rm = TRUE))
|
|
|
# Define colors
|
|
# Define colors
|
|
|
|
|
|
|
|
-color <- c("blue", "red", "green")
|
|
|
|
|
|
|
+color <- rainbow(3)
|
|
|
|
|
|
|
|
imdb_year_avg <- imdb %>%
|
|
imdb_year_avg <- imdb %>%
|
|
|
group_by(year) %>%
|
|
group_by(year) %>%
|
|
@@ -135,7 +135,7 @@ yearList <- merge(imdb_year_avg, groupLens_year_avg, by = "year")
|
|
|
yearList <- merge(yearList, netflix_year_avg, by = "year")
|
|
yearList <- merge(yearList, netflix_year_avg, by = "year")
|
|
|
yearList <- mutate(yearList, mean = ((rating + rating.x + rating.y) / 3))
|
|
yearList <- mutate(yearList, mean = ((rating + rating.x + rating.y) / 3))
|
|
|
|
|
|
|
|
-png(filename=returnPath("output/1.png"), height = 400, width = 900, bg = "white")
|
|
|
|
|
|
|
+png(filename=returnPath("output/Q1.png"), height = 400, width = 900, bg = "white")
|
|
|
|
|
|
|
|
plot(yearList$rating.x,
|
|
plot(yearList$rating.x,
|
|
|
type = "l",
|
|
type = "l",
|
|
@@ -146,7 +146,8 @@ plot(yearList$rating.x,
|
|
|
xlab = "Years",
|
|
xlab = "Years",
|
|
|
ylab = "Avg. rating",
|
|
ylab = "Avg. rating",
|
|
|
cex.lab=0.8,
|
|
cex.lab=0.8,
|
|
|
- lwd=2
|
|
|
|
|
|
|
+ lwd=2,
|
|
|
|
|
+ main = "In what movie release year where the average ratings the highest?"
|
|
|
)
|
|
)
|
|
|
axis(1, at=1:length(yearList$year), labels = yearList$year, pos = 0)
|
|
axis(1, at=1:length(yearList$year), labels = yearList$year, pos = 0)
|
|
|
axis(2, las = 1, at = 2*0:y, pos = 1)
|
|
axis(2, las = 1, at = 2*0:y, pos = 1)
|
|
@@ -172,15 +173,40 @@ lines(yearList$rating,
|
|
|
# col = "yellow",
|
|
# col = "yellow",
|
|
|
# lwd = 2
|
|
# lwd = 2
|
|
|
# )
|
|
# )
|
|
|
-
|
|
|
|
|
-legend(1, 10, set_names, cex = 0.8, col = color, lty=1:3, lwd = 2, bty="n")
|
|
|
|
|
|
|
+sorted <- arrange(yearList, desc(mean))
|
|
|
|
|
+highest <- sorted[1,]
|
|
|
|
|
+highest$mean <- round(highest$mean, digits = 3)
|
|
|
|
|
+
|
|
|
|
|
+points(
|
|
|
|
|
+ sum(
|
|
|
|
|
+ between(sorted$year, min(sorted$year),highest$year)
|
|
|
|
|
+ ),
|
|
|
|
|
+ highest$mean,
|
|
|
|
|
+ pch = 8,
|
|
|
|
|
+ lwd = 2,
|
|
|
|
|
+ cex = 2
|
|
|
|
|
+ )
|
|
|
|
|
+text(
|
|
|
|
|
+ sum(
|
|
|
|
|
+ between(sorted$year, min(sorted$year),highest$year)
|
|
|
|
|
+ ) - 0.3,
|
|
|
|
|
+ highest$mean + 0.7,
|
|
|
|
|
+ labels = highest$mean
|
|
|
|
|
+)
|
|
|
|
|
+text(
|
|
|
|
|
+ 50,
|
|
|
|
|
+ 10,
|
|
|
|
|
+ labels = paste(
|
|
|
|
|
+ "The highest average rating was in:",
|
|
|
|
|
+ highest$year
|
|
|
|
|
+ )
|
|
|
|
|
+)
|
|
|
|
|
+legend(1, 3, set_names, cex = 0.8, col = color, lty=1:3, lwd = 2, bty="n")
|
|
|
|
|
|
|
|
dev.off();
|
|
dev.off();
|
|
|
|
|
|
|
|
print("In what year are the ratings the highest?")
|
|
print("In what year are the ratings the highest?")
|
|
|
|
|
|
|
|
-sorted <- arrange(yearList, desc(mean))
|
|
|
|
|
-highest <- sorted[1,]
|
|
|
|
|
print(paste("That was in:", highest$year, "Score:", highest$mean))
|
|
print(paste("That was in:", highest$year, "Score:", highest$mean))
|
|
|
|
|
|
|
|
# Cleanup
|
|
# Cleanup
|
|
@@ -189,5 +215,36 @@ remove(highest)
|
|
|
|
|
|
|
|
print("Working on question no. 2...")
|
|
print("Working on question no. 2...")
|
|
|
|
|
|
|
|
|
|
+netflix_ <- round(mean(yearList$rating), digits = 3)
|
|
|
|
|
+imdb_ <- round(mean(yearList$rating.x), digits = 3)
|
|
|
|
|
+groupLens_ <- round(mean(yearList$rating.y), digits = 3)
|
|
|
|
|
+
|
|
|
|
|
+vct <- c(imdb_, groupLens_, netflix_)
|
|
|
|
|
|
|
|
|
|
+png(filename=returnPath("output/Q2.png"), height = 500, width = 450, bg = "white")
|
|
|
|
|
+
|
|
|
|
|
+barplot(
|
|
|
|
|
+ vct,
|
|
|
|
|
+ col = color,
|
|
|
|
|
+ names.arg = set_names,
|
|
|
|
|
+ ylim = c(0, y),
|
|
|
|
|
+ axes = TRUE,
|
|
|
|
|
+ xlab = "Data Provider",
|
|
|
|
|
+ ylab = "Avg. score",
|
|
|
|
|
+ main = "Which provider has the highest avrerage score?"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+text((1 - 0.3), (imdb_ + 0.2), labels = imdb_, col = color[1])
|
|
|
|
|
+text((2 - 0.1), (groupLens_ + 0.2), labels = groupLens_, col = color[2])
|
|
|
|
|
+text((3 + 0.1), (netflix_ + 0.2), labels = netflix_, col = color[3])
|
|
|
|
|
+
|
|
|
|
|
+text(1.5, y - 1, labels = paste("Provider with hightest average score is:", set_names[which.max(vct)]))
|
|
|
|
|
+
|
|
|
|
|
+dev.off()
|
|
|
|
|
+
|
|
|
|
|
+# Cleanup
|
|
|
|
|
|
|
|
|
|
+remove(netflix_)
|
|
|
|
|
+remove(groupLens_)
|
|
|
|
|
+remove(imdb_)
|
|
|
|
|
+remove(vct)
|