xxxxxxxxxx
print(r.eval('''
library(tidyverse)
user<-'https://raw.githubusercontent.com/osanchez2323/Portfolio/master/'
folder1<-paste0(user,'Data%20Science%20Takes%20on%20Education/Data/')
file1<-paste0(folder1,'2016%20School%20Explorer.csv')
file2<-paste0(folder1,'D5%20SHSAT%20Registrations%20and%20Testers.csv')
se<-read.csv(file1); d5<-read.csv(file2)
se<-se[,4:161]; se[se=='N/A']<-NA
cat(c(paste0('se => ',list(dim(se))),
paste0('d5 => ',list(dim(d5)))),'\n')
se$School.Name<-as.character(se$School.Name)
se$School.Name[428]<-'P.S. 212 D12'
se$School.Name[1024]<-'P.S. 212 D30'
se$School.Name[713]<-'P.S. 253 D21'
se$School.Name[909]<-'P.S. 253 D27'
percent_list<-
c('Percent.ELL','Percent.Asian','Percent.Black',
'Percent.Hispanic','Percent.Black...Hispanic','Percent.White',
'Student.Attendance.Rate','Percent.of.Students.Chronically.Absent',
'Rigorous.Instruction..','Collaborative.Teachers..',
'Supportive.Environment..','Effective.School.Leadership..',
'Strong.Family.Community.Ties..','Trust..')
target_list<-c('Average.ELA.Proficiency','Average.Math.Proficiency')
economic_list<-c('Economic.Need.Index','School.Income.Estimate')
rating_list<-
c('Rigorous.Instruction.Rating','Collaborative.Teachers.Rating',
'Supportive.Environment.Rating','Effective.School.Leadership.Rating',
'Strong.Family.Community.Ties.Rating','Trust.Rating',
'Student.Achievement.Rating')
str2num<-function(x){
x<-as.character(x)
for (s in c('%','$',',')){x<-sub(s,'',x)}
x<-as.numeric(x)}
for (el in c(percent_list,target_list,economic_list)) {
se[el]<-sapply(se[el],str2num)
se<-se%>%dplyr::group_by(City)%>%
fill(all_of(el),.direction='downup')%>%dplyr::ungroup()}
for (el in rating_list){
vtable<-data.frame(table(se[el]))
mvalue<-as.character(vtable$Var1[vtable$Freq==max(vtable$Freq)])
se[el][is.na(se[el])]<-mvalue}
sum(is.na(se))
'''))
xxxxxxxxxx
print(r.eval('''
options(bitmapType='cairo'); fn<-'Rplots.png'
png(fn,pointsize=12,bg='whitesmoke')
theme_set(theme_bw())
cols<-c('#3636ff','#ff3636','#6f6fff','#ff6f6f')
data<-data.frame(
x=c(se$Longitude),y=c(se$Latitude),
asian=c(se$Percent.Asian),
white=c(se$Percent.White),
black=c(se$Percent.Black))
vc=c(asian=cols[2],white='white',black=cols[1])
ggplot(data,aes(x=x,y=y,color=variable))+
geom_point(aes(size=black,col='black'))+
geom_point(aes(size=asian,col='asian',alpha=.4))+
geom_point(aes(size=white,col='white',alpha=.4))+
scale_size(range=c(0,1))+
scale_colour_manual(name='groups',values=vc)+
guides(size=guide_legend(title='percent'),alpha='none')+
ggtitle('Distribution by Population Groups')+
labs(x='longitude',y='latitude')+
theme(axis.text.x=element_text(size=3,vjust=.5),
axis.text.y=element_text(size=3,hjust=.5),
legend.text=element_text(size=3),
legend.key.size=unit(.2,'cm'),
legend.key=element_rect(color='slategray'),
legend.position='top',
panel.grid.major=element_line(color='whitesmoke',size=.1),
panel.grid.minor=element_line(color='gray80',size=.05),
title=element_text(size=4,hjust=.5),
panel.background=element_rect(fill='slategray'))
ggsave(fn,width=2.0,height=2.4)
dev.off()
'''))
No comments:
Post a Comment