user<-'https://raw.githubusercontent.com/osanchez2323/Portfolio/master/'
folder1<-paste0(user,'Data%20Science%20Takes%20on%20Education/Data/')
file1<-paste0(folder1,'2016%20School%20Explorer.csv')
file2<-paste0(folder1,'D5%20SHSAT%20Registrations%20and%20Testers.csv')
se<-read.csv(file1); d5<-read.csv(file2)
se<-se[,4:161]; se[se=='N/A']<-NA
cat(c(paste0('se => ',list(dim(se))),
paste0('d5 => ',list(dim(d5)))),'\n')
se$School.Name<-as.character(se$School.Name)
se$School.Name[428]<-'P.S. 212 D12'
se$School.Name[1024]<-'P.S. 212 D30'
se$School.Name[713]<-'P.S. 253 D21'
se$School.Name[909]<-'P.S. 253 D27'
c('Percent.ELL','Percent.Asian','Percent.Black',
'Percent.Hispanic','Percent.Black...Hispanic','Percent.White',
'Student.Attendance.Rate','Percent.of.Students.Chronically.Absent',
'Rigorous.Instruction..','Collaborative.Teachers..',
'Supportive.Environment..','Effective.School.Leadership..',
'Strong.Family.Community.Ties..','Trust..')
target_list<-c('Average.ELA.Proficiency','Average.Math.Proficiency')
economic_list<-c('Economic.Need.Index','School.Income.Estimate')
c('Rigorous.Instruction.Rating','Collaborative.Teachers.Rating',
'Supportive.Environment.Rating','Effective.School.Leadership.Rating',
'Strong.Family.Community.Ties.Rating','Trust.Rating',
'Student.Achievement.Rating')
for (s in c('%','$',',')){x<-sub(s,'',x)}
for (el in c(percent_list,target_list,economic_list)) {
se[el]<-sapply(se[el],str2num)
se<-se%>%dplyr::group_by(City)%>%
fill(all_of(el),.direction='downup')%>%dplyr::ungroup()}
vtable<-data.frame(table(se[el]))
mvalue<-as.character(vtable$Var1[vtable$Freq==max(vtable$Freq)])
se[el][is.na(se[el])]<-mvalue}