2015. 10. 16. 18:02ㆍ서버 프로그래밍
광주정보문화산업진흥원
강의시간 : 30시간
강의교재 : 하둡 완벽 가이드
-------------------------------------------------------------
www.oracle.com
jdk-8u60-windows-x64.exe 다운로드
http://www.cygwin.com/
setup-x86_64.exe (64-bit installation). 다운로드
https://archive.apache.org/dist/hadoop/core/hadoop-0.21.0/
hadoop-0.21.0.tar.gz (71M) 다운로드
http://www.eclipse.org/
Eclipse IDE for Java Developers
eclipse-java-mars-1-win32-x86_64.zip 다운로드
--------------------------------------------------------------
설치 폴더 : D:\cygwin64
설치 파일 다운로드 폴더 : d:\cygwin_local_packages
환경변수-시스템 변수 편집
...........;d:\cygwin64\bin;d:\cygwin64\usr\sbin
<Hadoop 설치>
$ tar xvfz hadoop-0.21.0.tar.gz
$ ln -s hadoop-0.21.0 hadoop
<SSH 설정>
$ ssh-host-config
*** Query: Should StrictModes be used? (yes/no)
-->no
*** Query: Should privilege separation be used? (yes/no)
-->no
*** Query: Do you want to install sshd as a service?
*** Query: (Say "no" if it is already installed as a service) (yes/no)
-->yes
*** Query: Enter the value of CYGWIN for the daemon: []
-->yes
*** Query: Do you want to use a different name? (yes/no)
-->no
*** Query: Create new privileged user account '405-188\cyg_server' (Cygwin name: 'cyg_server')? (yes/no)
-->yes
*** Query: Please enter the password:
*** Query: Reenter:
$ net start sshd
$ cd ~
$ ssh-keygen
$ cd .ssh
$ cat id_rsa.pub >> authorized_keys
$ vi hadoop-env.sh
export JAVA_HOME=/cygdrive/d/jdk1.8.0_60
$ vi core-site.xml
<property>
<name>fs.default.name</name>
<value>hdfs://127.0.0.1:9000</value>
</property>
$ vi hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
$ vi mapred-site.xml
<property>
<name>mapred.job.tracker</name>
<value>hdfs://127.0.0.1:9001</value>
</property>
$ cd ~/hadoop
$ ./bin/hadoop namenode -format
$ ./bin/start-all.sh
$ ./bin/stop-all.sh
./bin/hadoop-daemon.sh start namenode
./bin/hadoop-daemon.sh start secondarynamenode
./bin/hadoop-daemon.sh start datanode
./bin/hadoop-daemon.sh start jobtracker
./bin/hadoop-daemon.sh start tasktracker
$ vi input.txt
$ ./bin/hadoop fs -put input.txt input.txt
$ ./bin/hadoop fs -ls
$ ./bin/hadoop jar gitct-wordcount.jar kr.gitct.wordcount.WordCount input.txt wordcount_output
$ ./bin/hadoop fs -cat wordcount_output/part-r-00000
http://127.0.0.1:50030/jobtracker.jsp
------------------------------
$ ./bin/hadoop fs -put airline airline
$ ./bin/hadoop fs -ls airline
$ ./bin/hadoop jar gitct-delaycount.jar kr.gitct.delaycount.DelayCount airline delaycount_output
$ ./bin/hadoop fs -cat delaycount_output/part-r-00000
-------------------------------------------------------------------------------------------
https://www.r-project.org/
1)http://cran.nexr.com/ 접속
2)Download R for Windows 클릭
3)install R for the first time. 클릭
4)Download R 3.2.2 for Windows 클릭
----------------------------
1)www.db-expert.net
2)"R까기 책관련 예제 데이터모음" 클릭
3)첨부파일 (스크립트 및 원본 데이터 모음) 다운로드
----------------------------------
> setwd("d:/temp/r_temp")
> install.packages("KoNLP")
> install.packages("wordcloud")
> library(KoNLP)
> library(wordcloud)
> useSejongDic()
> mergeUserDic(data.frame("주상절리","ncn"))
> mergeUserDic(data.frame("협재해변","ncn"))
> mergeUserDic(data.frame("성산일출봉","ncn"))
mergeUserDic(data.frame("섭지코지","ncn"))
mergeUserDic(data.frame("천지연폭포","ncn"))
mergeUserDic(data.frame("우도","ncn"))
mergeUserDic(data.frame("산방산","ncn"))
mergeUserDic(data.frame("중문관광단지","ncn"))
mergeUserDic(data.frame("잠수함","ncn"))
mergeUserDic(data.frame("러브랜드","ncn"))
mergeUserDic(data.frame("용두암","ncn"))
mergeUserDic(data.frame("신비의도로","ncn"))
mergeUserDic(data.frame("한라산","ncn"))
mergeUserDic(data.frame("오설록","ncn"))
mergeUserDic(data.frame("유리의성","ncn"))
mergeUserDic(data.frame("한림공원","ncn"))
mergeUserDic(data.frame("용머리해안","ncn"))
mergeUserDic(data.frame("해수욕장","ncn"))
mergeUserDic(data.frame("중문","ncn"))
mergeUserDic(data.frame("제주민속촌","ncn"))
mergeUserDic(data.frame("외돌개","ncn"))
mergeUserDic(data.frame("에코랜드","ncn"))
> txt <- readLines("jeju.txt")
> place <- sapply(txt,extractNoun,USE.NAMES=F)
> head(unlist(place),30)
> c <- unlist(place)
> place <- Filter(function(x) { nchar(x) >= 2 }, c)
place <- gsub("제주","",place)
place <- gsub("통운","",place)
place <- gsub("전국","",place)
place <- gsub("체인","",place)
place <- gsub("업체","",place)
place <- gsub("질문","",place)
place <- gsub("가격","",place)
place <- gsub("무난","",place)
place <- gsub("여행","",place)
place <- gsub("검색","",place)
place <- gsub("코스","",place)
place <- gsub("숙소","",place)
place <- gsub("준비","",place)
place <- gsub("다운로드","",place)
place <- gsub("조회수","",place)
place <- gsub("추천수","",place)
place <- gsub("추천","",place)
place <- gsub("답변수","",place)
place <- gsub("첫째날","",place)
place <- gsub("첫쨋날","",place)
place <- gsub("좋구요","",place)
place <- gsub("이런거","",place)
place <- gsub("둘째날","",place)
place <- gsub("셋째날","",place)
place <- gsub("세쨋날","",place)
place <- gsub("토요일","",place)
place <- gsub("일요일","",place)
place <- gsub("시간","",place)
place <- gsub("항공","",place)
place <- gsub("관광지","",place)
place <- gsub("입장료","",place)
place <- gsub("저가","",place)
place <- gsub("항공사","",place)
place <- gsub("도움","",place)
place <- gsub("대략","",place)
place <- gsub("요금","",place)
place <- gsub("\\-","",place)
place <- gsub("이용","",place)
place <- gsub("공항","",place)
place <- gsub("해안","",place)
place <- gsub("드라이브","",place)
place <- gsub("경유","",place)
place <- gsub("바다","",place)
place <- gsub("전망","",place)
place <- gsub("하루","",place)
place <- gsub("렌트카","",place)
place <- gsub("하시","",place)
place <- gsub("예약","",place)
place <- gsub("사진","",place)
place <- gsub("위치","",place)
place <- gsub("필요","",place)
place <- gsub("할인","",place)
place <- gsub("출발","",place)
place <- gsub("가능","",place)
place <- gsub("소요","",place)
place <- gsub("일정","",place)
place <- gsub("하게","",place)
place <- gsub("근처","",place)
place <- gsub("중간","",place)
place <- gsub("다양","",place)
place <- gsub("첫날","",place)
place <- gsub("도착","",place)
place <- gsub("용머","",place)
place <- gsub("리","",place)
place <- gsub("바위","",place)
place <- gsub("유명","",place)
place <- gsub("정도","",place)
place <- gsub("이동","",place)
place <- gsub("무료","",place)
place <- gsub("용머","",place)
place <- gsub("체험","",place)
place <- gsub("둘째","",place)
place <- gsub(" ","",place)
place <- gsub("\\d+","",place)
> write(unlist(place),"jeju_2.txt")
> rev <- read.table("jeju_2.txt")
> nrow(rev)
[1] 1539
> wordcount <- table(rev)
> head(sort(wordcount,decreasing=T),30)
--------------------------------
> library(RColorBrewer)
> palete <- brewer.pal(9,"Set1")
> wordcloud(names(wordcount),freq=wordcount,scale=c(5,1),
rot.per=0.25,min.freq=1,random.order=F,random.color=T,colors=palete)
> savePlot("jeju.png",type="png")
---------------------------------
> a <- head(sort(wordcount,decreasing=T),10)
> pie(a)
> pie(a,col=rainbow(10),radius=1)
> pct <- round(a/sum(a) * 100,1)
> lab <- paste(names(a),"\n",pct,"%")
> pie(a,main="제주도 추천 코스",col=rainbow(10),
+ cex=0.8,labels=lab)
> lab2 <- paste(names(a),"\n",pct,"% (",a,"건)")
> pie(a,main="제주도 추천 코스",col=rainbow(10),
+ cex=0.8,labels=lab2)
> par(new=T)
> pie(a,radius=0.6,col="white",labels=NA,border=NA)
> savePlot("donut_1.png",type="png")
-----------------------------------
> b <- head(sort(wordcount,decreasing=T),10)
> pct <- round(b/sum(b)*100,1)
> bp <- barplot(b, main="제주도 추천 여행지 TOP 10",
+ col=rainbow(10),cex.names=0.7,las=2,ylim=c(0,25))
> text(x=bp,y=b*1.05,labels=paste("(",pct,"%",")"),col="black",cex=0.7)
> text(x=bp,y=b*0.95,labels=paste(b,"건"),col="black",cex=0.7)
> bp <- barplot(b, main="제주 여행 코스",col=rainbow(10),
+ xlim=c(0,25),cex.name=0.7,las=1,horiz=T)
> text(y=bp,x=b*0.9,labels=paste(b,"건"),col="black",cex=0.7)
> text(y=bp,x=b*1.15,labels=paste("(",pct,"%",")"),col="black",cex=0.7)
--------------------------------------------
> txt <- readLines("propose.txt")
> pro <- sapply(txt,extractNoun,USE.NAMES=F)
> c <- unlist(pro)
> pro <- Filter(function(x) { nchar(x) >= 2 },c)
pro <- gsub("프로포즈","",pro)
pro <- gsub("propose","",pro)
pro <- gsub("선물","",pro)
pro <- gsub("조회수","",pro)
pro <- gsub("조회","",pro)
pro <- gsub("\\.","",pro)
pro <- gsub("사회","",pro)
pro <- gsub("사람","",pro)
pro <- gsub("생각","",pro)
pro <- gsub("준비","",pro)
pro <- gsub("연애","",pro)
pro <- gsub("패션","",pro)
pro <- gsub("방법","",pro)
pro <- gsub("추천수","",pro)
pro <- gsub("\\n","",pro)
pro <- gsub("\\d+","",pro)
pro <- gsub("남자","",pro)
pro <- gsub("가족","",pro)
pro <- gsub("친구","",pro)
pro <- gsub("답변","",pro)
pro <- gsub("추천","",pro)
pro <- gsub("특별","",pro)
pro <- gsub("생활","",pro)
pro <- gsub("결혼","",pro)
pro <- gsub("하시","",pro)
pro <- gsub("조언","",pro)
pro <- gsub("그룹","",pro)
pro <- gsub("하게","",pro)
pro <- gsub("여자","",pro)
pro <- gsub("장소","",pro)
pro <- gsub("감동","",pro)
pro <- gsub("커플","",pro)
pro <- gsub("행사","",pro)
pro <- gsub("성공","",pro)
pro <- gsub("시간","",pro)
pro <- gsub("감사","",pro)
pro <- gsub("기억","",pro)
pro <- gsub("누나","",pro)
pro <- gsub("문화","",pro)
pro <- gsub("정치","",pro)
pro <- gsub("질문","",pro)
> head(unlist(pro),20)
> write(unlist(pro),"pro_3.txt")
> rev <- read.table("pro_3.txt")
> nrow(rev)
[1] 839
> wordcount <- table(rev)
> head(sort(wordcount,decreasing=T),20)
> recommand <- head(sort(wordcount,decreasing=T),10)
> barplot(recommand,main="프로포즈 선물 TOP 10",col=rainbow(10),
+ space=0.8,ylim=c(0,60),cex.name=0.7,las=2)
-----------------------------
> plot(recommand,xlab="",ylab="",ylim=c(0,60),axes=FALSE,type="o",
+ col="red",main="프로포즈 선물 TOP 10",lwd=2)
> axis(1,at=1:10,lab=names(recommand),las=2)
> axis(2,las=1)
> abline(h=seq(0,60,5),v=seq(1,10,1),col="gray",lty=2)
-----------------------------
> install.packages("plotrix")
> library(plotrix)
> th_pct <- round(recommand/sum(recommand)*100,1)
> th_names <- names(recommand)
> th_labels <- paste(th_names,"\n","(",th_pct,")")
> pie3D(recommand,main="프로포즈 선물 TOP 10",col=rainbow(10),
+ cex=0.7,labels=th_labels,explode=0.05)
-------------------------------
> install.packages("ggmap")
> library(ggmap)
> loc <- read.csv("지역별장애인도서관정보.csv",header=T)
> kor <- get_map("seoul",zoom=11,maptype="roadmap")
> kor.map <- ggmap(kor)+geom_point(data=loc,aes(x=LON,y=LAT),
+ size=5,alpha=0.7)
> kor.map + geom_text(data=loc,aes(x=LON,y=LAT+0.01,label=자치구명),size=3)
--------------------------------
> library(ggmap)
> library(grid)
> pop <- read.csv("지역별인구현황_2014_4월기준.csv",header=T)
> lon <- pop$LON
> lat <- pop$LAT
> data <- pop$총인구수
> df <- data.frame(lon,lat,data)
> map1 <- get_map("Jeonju",zoom=7,maptype='roadmap')
> map1 <- ggmap(map1)
> map1 + geom_point(aes(x=lon,y=lat,colour=data,size=data),data=df)
------------------------------------------
> gwangju <- read.csv("gwangju.txt",header=T)
> gwangju
자치구 인구 LAT LON
1 동구 101454 35.14614 126.9231
2 서구 309732 35.15197 126.8903
3 남구 222105 35.13297 126.9024
4 북구 452610 35.17406 126.9119
5 광산구 408402 35.13952 126.7937
> map5 <- get_map("Gwangju",zoom=12,maptype='roadmap')
> map5 <- ggmap(map5)
> map5 + geom_point(aes(x=LON,y=LAT,colour=인구,size=인구),data=gwangju)
> ggsave("gwangju.png",dpi=500)