main.R 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. library(readr)
  2. library(dplyr)
  3. library(stringr)
  4. library(parallel)
  5. SENSITIVITY <- 4
  6. #MAX <- nrow(DATA1)
  7. MAX <- 500
  8. LEARNED.POSITIVE <- paste(getwd(), "learned_positive.csv", sep = "/")
  9. LEARNED.NEGATIVE <- paste(getwd(), "learned_negative.csv", sep = "/")
  10. sentiment.train <- function(str, sen){
  11. env <- .GlobalEnv
  12. spl <- sentiment.split(toupper(str))
  13. if((length(spl)-SENSITIVITY+1) < 1) {
  14. cat("You have to train me with more text.\n")
  15. return()
  16. }
  17. for(i in 1:(length(spl)-SENSITIVITY+1)){
  18. grp <- paste(spl[i:(i+SENSITIVITY-1)], collapse = ' ')
  19. if(sen == 1){
  20. if(nrow(env$positive %>% filter(str_detect(cmb, grp))) == 1){
  21. env$positive[env$positive$cmb==grp,]$cnt <- env$positive[env$positive$cmb==grp,]$cnt + 1
  22. }
  23. else{
  24. env$positive[(nrow(env$positive)+1), ] <- list(as.character(grp), 1)
  25. }
  26. }
  27. else{
  28. if(nrow(env$negative %>% filter(str_detect(cmb, grp))) == 1){
  29. env$negative[env$negative$cmb==grp,]$cnt <- env$negative[env$negative$cmb==grp,]$cnt + 1
  30. }
  31. else{
  32. env$negative[(nrow(env$negative)+1), ] <- list(as.character(grp), 1)
  33. }
  34. }
  35. }
  36. }
  37. sentiment.test <- function(){
  38. env <- .GlobalEnv
  39. cat("Ohh so you want to test me?\n")
  40. cat("Well come on then. Let's do this!\n\n")
  41. cat("First of all. Can you give me the test(set)?\n")
  42. set <- file.choose()
  43. set <- env$set.import(set)
  44. MAX <- nrow(set)
  45. MIN <- 1
  46. if(!console.confirm(paste("Do you want me to test all of the", as.character(nrow(set)), "records?"))){
  47. cat("Well thanks that might just saved me a huge headache.\n")
  48. repeat{
  49. MIN <- console.ask("So where do you want me to start?", type = "integer")
  50. if(MIN > 0 && MIN <= MAX) break
  51. else cat("Please enter a number bigger than 0 and smaller or equal than the records in this set.\n")
  52. }
  53. repeat{
  54. MAX <- console.ask("And where do you want me to stop?", type = "integer")
  55. if(MAX >= MIN && MAX <= nrow(set)) break
  56. else cat(paste("Please enter a number bigger or equal then", as.character(MIN), "and smaller or equal then", as.character(nrow(set))))
  57. }
  58. }
  59. score <- c()
  60. cat("*Intensive thinking* Hmmmm...\n")
  61. progress <- txtProgressBar(min = (MIN - 1), max = MAX, style = 3)
  62. setTxtProgressBar(progress, (MIN - 1))
  63. for(i in MIN:MAX){
  64. test <- sentiment.calc(set[i,]$review, progress = FALSE)
  65. score <- c(score, (test==as.integer(set[i,]$sentiment)))
  66. setTxtProgressBar(progress, i)
  67. }
  68. close(progress)
  69. cat("Phoee... Finally done. Hope I did well...\n")
  70. score <- as.integer(mean(score)*100)
  71. if(score > 80){
  72. cat(paste0("OMG! I got ", as.character(score), "% correct!\n"))
  73. }
  74. else{
  75. cat(paste0("Hmm. I'm not happy with a score of ", as.character(score), "%\n"))
  76. }
  77. return(score)
  78. }
  79. sentiment.calc <- function(str, progress=TRUE) {
  80. env <- .GlobalEnv
  81. spl <- sentiment.split(toupper(str))
  82. pos <- 0
  83. neg <- 0
  84. data <- data.frame(grp=character())
  85. if((length(spl)-SENSITIVITY+1) < 1){
  86. stop("I really need some more text to figure this one out.\n")
  87. }
  88. if(progress){
  89. cat("Let me think...\n")
  90. prog <- txtProgressBar(0, (length(spl)-SENSITIVITY+1), style = 3)
  91. }
  92. for(i in 1:(length(spl)-SENSITIVITY+1)){
  93. grp <- paste(spl[i:(i+SENSITIVITY-1)], collapse = ' ')
  94. if(nrow(env$positive[env$positive==grp,]) == 1){
  95. pos <- pos + env$positive[env$positive==grp,]$cnt
  96. }
  97. if(nrow(env$negative[env$negative==grp,]) == 1){
  98. neg <- neg + env$negative[env$negative==grp,]$cnt
  99. }
  100. if(progress) setTxtProgressBar(prog, i)
  101. }
  102. if(progress){
  103. close(prog)
  104. if(pos >= neg){
  105. cat("This must be a POSITIVE review\n")
  106. }
  107. else{
  108. cat("This must be a NEGATIVE review\n")
  109. }
  110. }
  111. #print(pos)
  112. #print(neg)
  113. return(pos >= neg)
  114. }
  115. sentiment.split <- function(str){
  116. return(na.omit(unlist(strsplit(unlist(str), "[^a-zA-Z]+"))))
  117. }
  118. learn.save <- function(){
  119. env <- parent.frame()
  120. if(!file.exists(LEARNED.POSITIVE) || console.confirm("Positive learned file already exists. Overwrite?")){
  121. write_csv(env$positive, LEARNED.POSITIVE)
  122. }
  123. if(!file.exists(LEARNED.NEGATIVE) || console.confirm("Negative learned file already exists. Overwrite?")){
  124. write_csv(env$negative, LEARNED.NEGATIVE)
  125. }
  126. }
  127. learn.load <- function(){
  128. env <- parent.frame()
  129. if(file.exists(LEARNED.POSITIVE) && file.exists(LEARNED.NEGATIVE)){
  130. if(console.confirm("I found out that I already learned something a while ago. Do you want to use that data?")){
  131. env$positive <- read_csv(LEARNED.POSITIVE)
  132. env$negative <- read_csv(LEARNED.NEGATIVE)
  133. return(TRUE)
  134. }
  135. }
  136. return(FALSE)
  137. }
  138. console.confirm <- function(str){
  139. repeat{
  140. ans <- readline(prompt = paste(str, "[Y|N]: "))
  141. if(ans == "Y") return(TRUE)
  142. if(ans == "N") return(FALSE)
  143. cat("Enter Y or N. Let's try it again.\n")
  144. }
  145. }
  146. console.ask <- function(str, type="string"){
  147. repeat{
  148. ans <- readline(prompt = paste0(str, " [", type, "]: "))
  149. if(type == "string"){
  150. return(ans)
  151. }
  152. if(type == "integer"){
  153. if(grepl("^[0-9]+$", ans)){
  154. return(as.integer(ans))
  155. }
  156. }
  157. cat(paste(type, "only please!", "\n"))
  158. }
  159. }
  160. set.import <- function(fullPath){
  161. if(endsWith(fullPath, ".tsv")) return(read_delim(fullPath, "\t", escape_backslash = TRUE, escape_double = FALSE, trim_ws = TRUE))
  162. else if(endsWith(fullPath, ".csv")) return(read_csv(fullPath, trim_ws = TRUE))
  163. }
  164. learn.teach <- function(){
  165. if(console.confirm("Do you want to train me so I can be better?")){
  166. if(exists("positive") && exists("negative")){
  167. cat("Hmmm... I already know someting.\n")
  168. if(!console.confirm("Do you want me to continue to learn? (Append learning skillset)")){
  169. positive <- data.frame(cmb=character(), cnt=integer(), stringsAsFactors = F)
  170. negative <- data.frame(cmb=character(), cnt=integer(), stringsAsFactors = F)
  171. }
  172. }
  173. else{
  174. positive <- data.frame(cmb=character(), cnt=integer(), stringsAsFactors = F)
  175. negative <- data.frame(cmb=character(), cnt=integer(), stringsAsFactors = F)
  176. }
  177. set <- file.choose()
  178. set <- set.import(set)
  179. MAX <- nrow(set)
  180. MIN <- 1
  181. if(!console.confirm(paste("Do you want me to learn all of the", as.character(nrow(set)), "records?"))){
  182. cat("Well thanks that might just saved me a huge headache.\n")
  183. repeat{
  184. MIN <- console.ask("So where do you want me to start?", type = "integer")
  185. if(MIN > 0 && MIN <= MAX) break
  186. else cat("Please enter a number bigger than 0 and smaller or equal than ", as.character(MAX), "\n")
  187. }
  188. repeat{
  189. MAX <- console.ask("And where do you want me to stop?", type = "integer")
  190. if(MAX >= MIN && MAX <= nrow(set)) break
  191. else cat(paste("Please enter a number bigger or equal then", as.character(MIN), "and smaller or equal then", as.character(nrow(set))))
  192. }
  193. }
  194. cat("Getting smarter...\n")
  195. progress <- txtProgressBar((MIN-1), MAX, style = 3)
  196. setTxtProgressBar(progress, (MIN-1))
  197. for(i in MIN:MAX){
  198. sentiment.train(set[i,]$review, as.integer(set[i,]$sentiment))
  199. setTxtProgressBar(progress, i)
  200. }
  201. close(progress)
  202. if(console.confirm("Let me catch some breath here. Do you want me to remeber this training?")) learn.save()
  203. }
  204. cat("Now that I know everything. There is one thing you should learn.\n")
  205. cat("If you want me to analyse a review just call:\n\n")
  206. cat("sentiment.calc(<any text>)\n\n")
  207. cat("Now let's get started!\n")
  208. }