빅데이터, NOSQL, Hadoop, R 강의 자료

2015. 10. 16. 18:02서버 프로그래밍

광주정보문화산업진흥원

강의시간 : 30시간

강의교재 : 하둡 완벽 가이드


-------------------------------------------------------------

www.oracle.com

jdk-8u60-windows-x64.exe 다운로드


http://www.cygwin.com/

setup-x86_64.exe (64-bit installation). 다운로드


https://archive.apache.org/dist/hadoop/core/hadoop-0.21.0/

hadoop-0.21.0.tar.gz  (71M) 다운로드


http://www.eclipse.org/

Eclipse IDE for Java Developers

eclipse-java-mars-1-win32-x86_64.zip 다운로드

--------------------------------------------------------------


설치 폴더 : D:\cygwin64

설치 파일 다운로드 폴더 : d:\cygwin_local_packages


환경변수-시스템 변수 편집

...........;d:\cygwin64\bin;d:\cygwin64\usr\sbin


<Hadoop 설치>

$ tar xvfz hadoop-0.21.0.tar.gz

$ ln -s hadoop-0.21.0 hadoop


<SSH 설정>

$ ssh-host-config

*** Query: Should StrictModes be used? (yes/no) 

-->no

*** Query: Should privilege separation be used? (yes/no) 

-->no

*** Query: Do you want to install sshd as a service?

*** Query: (Say "no" if it is already installed as a service) (yes/no) 

-->yes

*** Query: Enter the value of CYGWIN for the daemon: [] 

-->yes

*** Query: Do you want to use a different name? (yes/no) 

-->no

*** Query: Create new privileged user account '405-188\cyg_server' (Cygwin name: 'cyg_server')? (yes/no) 

-->yes

*** Query: Please enter the password:

*** Query: Reenter:

$ net start sshd


$ cd ~

$ ssh-keygen

$ cd .ssh

$ cat id_rsa.pub >> authorized_keys


$ vi hadoop-env.sh

export JAVA_HOME=/cygdrive/d/jdk1.8.0_60


$ vi core-site.xml

<property>

<name>fs.default.name</name>

<value>hdfs://127.0.0.1:9000</value>

</property>


$ vi hdfs-site.xml

<property>

<name>dfs.replication</name>

<value>1</value>

</property>

<property>

<name>dfs.permissions</name>

<value>false</value>

</property>


$ vi mapred-site.xml

<property>

<name>mapred.job.tracker</name>

<value>hdfs://127.0.0.1:9001</value>

</property>


$ cd ~/hadoop

$ ./bin/hadoop namenode -format

$ ./bin/start-all.sh

$ ./bin/stop-all.sh


./bin/hadoop-daemon.sh start namenode

./bin/hadoop-daemon.sh start secondarynamenode

./bin/hadoop-daemon.sh start datanode

./bin/hadoop-daemon.sh start jobtracker

./bin/hadoop-daemon.sh start tasktracker


$ vi input.txt

$ ./bin/hadoop fs -put input.txt input.txt

$ ./bin/hadoop fs -ls


$ ./bin/hadoop jar gitct-wordcount.jar kr.gitct.wordcount.WordCount input.txt wordcount_output

$ ./bin/hadoop fs -cat wordcount_output/part-r-00000

http://127.0.0.1:50030/jobtracker.jsp

------------------------------

$ ./bin/hadoop fs -put airline airline

$ ./bin/hadoop fs -ls airline

$ ./bin/hadoop jar gitct-delaycount.jar kr.gitct.delaycount.DelayCount airline delaycount_output

$ ./bin/hadoop fs -cat delaycount_output/part-r-00000


-------------------------------------------------------------------------------------------


https://www.r-project.org/

1)http://cran.nexr.com/ 접속

2)Download R for Windows 클릭

3)install R for the first time. 클릭

4)Download R 3.2.2 for Windows 클릭

----------------------------

1)www.db-expert.net

2)"R까기 책관련 예제 데이터모음" 클릭

3)첨부파일 (스크립트 및 원본 데이터 모음) 다운로드

----------------------------------

> setwd("d:/temp/r_temp")

> install.packages("KoNLP")

> install.packages("wordcloud")

> library(KoNLP)

> library(wordcloud)

> useSejongDic()

> mergeUserDic(data.frame("주상절리","ncn"))

> mergeUserDic(data.frame("협재해변","ncn"))

> mergeUserDic(data.frame("성산일출봉","ncn"))

mergeUserDic(data.frame("섭지코지","ncn"))

mergeUserDic(data.frame("천지연폭포","ncn"))

mergeUserDic(data.frame("우도","ncn"))

mergeUserDic(data.frame("산방산","ncn"))

mergeUserDic(data.frame("중문관광단지","ncn"))

mergeUserDic(data.frame("잠수함","ncn"))

mergeUserDic(data.frame("러브랜드","ncn"))

mergeUserDic(data.frame("용두암","ncn"))

mergeUserDic(data.frame("신비의도로","ncn"))

mergeUserDic(data.frame("한라산","ncn"))

mergeUserDic(data.frame("오설록","ncn"))

mergeUserDic(data.frame("유리의성","ncn"))

mergeUserDic(data.frame("한림공원","ncn"))

mergeUserDic(data.frame("용머리해안","ncn"))

mergeUserDic(data.frame("해수욕장","ncn"))

mergeUserDic(data.frame("중문","ncn"))

mergeUserDic(data.frame("제주민속촌","ncn"))

mergeUserDic(data.frame("외돌개","ncn"))

mergeUserDic(data.frame("에코랜드","ncn"))

> txt <- readLines("jeju.txt")

> place <- sapply(txt,extractNoun,USE.NAMES=F)

> head(unlist(place),30)

> c <- unlist(place)

> place <- Filter(function(x) { nchar(x) >= 2 }, c)

place <- gsub("제주","",place)

place <- gsub("통운","",place)

place <- gsub("전국","",place)

place <- gsub("체인","",place)

place <- gsub("업체","",place)

place <- gsub("질문","",place)

place <- gsub("가격","",place)

place <- gsub("무난","",place)

place <- gsub("여행","",place)

place <- gsub("검색","",place)

place <- gsub("코스","",place)

place <- gsub("숙소","",place)

place <- gsub("준비","",place)

place <- gsub("다운로드","",place)

place <- gsub("조회수","",place)

place <- gsub("추천수","",place)

place <- gsub("추천","",place)

place <- gsub("답변수","",place)

place <- gsub("첫째날","",place)

place <- gsub("첫쨋날","",place)

place <- gsub("좋구요","",place)

place <- gsub("이런거","",place)

place <- gsub("둘째날","",place)

place <- gsub("셋째날","",place)

place <- gsub("세쨋날","",place)

place <- gsub("토요일","",place)

place <- gsub("일요일","",place)

place <- gsub("시간","",place)

place <- gsub("항공","",place)

place <- gsub("관광지","",place)

place <- gsub("입장료","",place)

place <- gsub("저가","",place)

place <- gsub("항공사","",place)

place <- gsub("도움","",place)

place <- gsub("대략","",place)

place <- gsub("요금","",place)

place <- gsub("\\-","",place)

place <- gsub("이용","",place)

place <- gsub("공항","",place)

place <- gsub("해안","",place)

place <- gsub("드라이브","",place)

place <- gsub("경유","",place)

place <- gsub("바다","",place)

place <- gsub("전망","",place)

place <- gsub("하루","",place)

place <- gsub("렌트카","",place)

place <- gsub("하시","",place)

place <- gsub("예약","",place)

place <- gsub("사진","",place)

place <- gsub("위치","",place)

place <- gsub("필요","",place)

place <- gsub("할인","",place)

place <- gsub("출발","",place)

place <- gsub("가능","",place)

place <- gsub("소요","",place)

place <- gsub("일정","",place)

place <- gsub("하게","",place)

place <- gsub("근처","",place)

place <- gsub("중간","",place)

place <- gsub("다양","",place)

place <- gsub("첫날","",place)

place <- gsub("도착","",place)

place <- gsub("용머","",place)

place <- gsub("리","",place)

place <- gsub("바위","",place)

place <- gsub("유명","",place)

place <- gsub("정도","",place)

place <- gsub("이동","",place)

place <- gsub("무료","",place)

place <- gsub("용머","",place)

place <- gsub("체험","",place)

place <- gsub("둘째","",place)

place <- gsub(" ","",place)

place <- gsub("\\d+","",place)

> write(unlist(place),"jeju_2.txt")

> rev <- read.table("jeju_2.txt")

> nrow(rev)

[1] 1539

> wordcount <- table(rev)

> head(sort(wordcount,decreasing=T),30)

--------------------------------

> library(RColorBrewer)

> palete <- brewer.pal(9,"Set1")

> wordcloud(names(wordcount),freq=wordcount,scale=c(5,1),

rot.per=0.25,min.freq=1,random.order=F,random.color=T,colors=palete)

> savePlot("jeju.png",type="png")

---------------------------------

> a <- head(sort(wordcount,decreasing=T),10)

> pie(a)

> pie(a,col=rainbow(10),radius=1)

> pct <- round(a/sum(a) * 100,1)

> lab <- paste(names(a),"\n",pct,"%")

> pie(a,main="제주도 추천 코스",col=rainbow(10),

+ cex=0.8,labels=lab)

> lab2 <- paste(names(a),"\n",pct,"% (",a,"건)")

> pie(a,main="제주도 추천 코스",col=rainbow(10),

+ cex=0.8,labels=lab2)

> par(new=T)

> pie(a,radius=0.6,col="white",labels=NA,border=NA)

> savePlot("donut_1.png",type="png")

-----------------------------------

> b <- head(sort(wordcount,decreasing=T),10)

> pct <- round(b/sum(b)*100,1)

> bp <- barplot(b, main="제주도 추천 여행지 TOP 10",

+ col=rainbow(10),cex.names=0.7,las=2,ylim=c(0,25))

> text(x=bp,y=b*1.05,labels=paste("(",pct,"%",")"),col="black",cex=0.7)

> text(x=bp,y=b*0.95,labels=paste(b,"건"),col="black",cex=0.7)


> bp <- barplot(b, main="제주 여행 코스",col=rainbow(10),

+ xlim=c(0,25),cex.name=0.7,las=1,horiz=T)

> text(y=bp,x=b*0.9,labels=paste(b,"건"),col="black",cex=0.7)

> text(y=bp,x=b*1.15,labels=paste("(",pct,"%",")"),col="black",cex=0.7)

--------------------------------------------

> txt <- readLines("propose.txt")

> pro <- sapply(txt,extractNoun,USE.NAMES=F)

> c <- unlist(pro)

> pro <- Filter(function(x) { nchar(x) >= 2 },c)

pro <- gsub("프로포즈","",pro)

pro <- gsub("propose","",pro)

pro <- gsub("선물","",pro)

pro <- gsub("조회수","",pro)

pro <- gsub("조회","",pro)

pro <- gsub("\\.","",pro)

pro <- gsub("사회","",pro)

pro <- gsub("사람","",pro)

pro <- gsub("생각","",pro)

pro <- gsub("준비","",pro)

pro <- gsub("연애","",pro)

pro <- gsub("패션","",pro)

pro <- gsub("방법","",pro)

pro <- gsub("추천수","",pro)

pro <- gsub("\\n","",pro)

pro <- gsub("\\d+","",pro)

pro <- gsub("남자","",pro)

pro <- gsub("가족","",pro)

pro <- gsub("친구","",pro)

pro <- gsub("답변","",pro)

pro <- gsub("추천","",pro)

pro <- gsub("특별","",pro)

pro <- gsub("생활","",pro)

pro <- gsub("결혼","",pro)

pro <- gsub("하시","",pro)

pro <- gsub("조언","",pro)

pro <- gsub("그룹","",pro)

pro <- gsub("하게","",pro)

pro <- gsub("여자","",pro)

pro <- gsub("장소","",pro)

pro <- gsub("감동","",pro)

pro <- gsub("커플","",pro)

pro <- gsub("행사","",pro)

pro <- gsub("성공","",pro)

pro <- gsub("시간","",pro)

pro <- gsub("감사","",pro)

pro <- gsub("기억","",pro)

pro <- gsub("누나","",pro)

pro <- gsub("문화","",pro)

pro <- gsub("정치","",pro)

pro <- gsub("질문","",pro)

> head(unlist(pro),20)

> write(unlist(pro),"pro_3.txt")

> rev <- read.table("pro_3.txt")

> nrow(rev)

[1] 839

> wordcount <- table(rev)

> head(sort(wordcount,decreasing=T),20)

> recommand <- head(sort(wordcount,decreasing=T),10)

> barplot(recommand,main="프로포즈 선물 TOP 10",col=rainbow(10),

+ space=0.8,ylim=c(0,60),cex.name=0.7,las=2)

-----------------------------

> plot(recommand,xlab="",ylab="",ylim=c(0,60),axes=FALSE,type="o",

+ col="red",main="프로포즈 선물 TOP 10",lwd=2)

> axis(1,at=1:10,lab=names(recommand),las=2)

> axis(2,las=1)

> abline(h=seq(0,60,5),v=seq(1,10,1),col="gray",lty=2)

-----------------------------

> install.packages("plotrix")

> library(plotrix)

> th_pct <- round(recommand/sum(recommand)*100,1)

> th_names <- names(recommand)

> th_labels <- paste(th_names,"\n","(",th_pct,")")

> pie3D(recommand,main="프로포즈 선물 TOP 10",col=rainbow(10),

+ cex=0.7,labels=th_labels,explode=0.05)

-------------------------------

> install.packages("ggmap")

> library(ggmap)

> loc <- read.csv("지역별장애인도서관정보.csv",header=T)

> kor <- get_map("seoul",zoom=11,maptype="roadmap")

> kor.map <- ggmap(kor)+geom_point(data=loc,aes(x=LON,y=LAT),

+ size=5,alpha=0.7)

> kor.map + geom_text(data=loc,aes(x=LON,y=LAT+0.01,label=자치구명),size=3)

--------------------------------

> library(ggmap)

> library(grid)

> pop <- read.csv("지역별인구현황_2014_4월기준.csv",header=T)

> lon <- pop$LON

> lat <- pop$LAT

> data <- pop$총인구수

> df <- data.frame(lon,lat,data)

> map1 <- get_map("Jeonju",zoom=7,maptype='roadmap')

> map1 <- ggmap(map1)

> map1 + geom_point(aes(x=lon,y=lat,colour=data,size=data),data=df)

------------------------------------------

> gwangju <- read.csv("gwangju.txt",header=T)

> gwangju

  자치구   인구      LAT      LON

1   동구 101454 35.14614 126.9231

2   서구 309732 35.15197 126.8903

3   남구 222105 35.13297 126.9024

4   북구 452610 35.17406 126.9119

5 광산구 408402 35.13952 126.7937

> map5 <- get_map("Gwangju",zoom=12,maptype='roadmap')

> map5 <- ggmap(map5)

> map5 + geom_point(aes(x=LON,y=LAT,colour=인구,size=인구),data=gwangju)

> ggsave("gwangju.png",dpi=500)



gwangju.txt