Week1-2
Goal: To integrate two data frames concerning the number and nationality of tourists leaving for Taiwan, as well as the number of Taiwanese tourists and their destination.
Data Source:
Step1:Import data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
Foreignernation <-read_excel("C:\\Users\\User\\Desktop\\Foreigner_nation.xlsx")
Taiwanesedestination <- read_excel("C:\\Users\\User\\Desktop\\Taiwanese_destination (2).xlsx")
head(Taiwanesedestination)
## # A tibble: 6 x 18
## `First stop arr~ country `2002` `2003` `2004` `2005` `2006` `2007` `2008`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Asia Hong K~ 24188~ 18690~ 25597~ 28070~ 29933~ 30309~ 28511~
## 2 Asia Mainla~ 1 44 - - - - 188744
## 3 Asia Japan 797460 731330 10519~ 11804~ 12140~ 12808~ 13098~
## 4 Asia Korea,~ 120208 179893 298325 368206 396705 457095 363122
## 5 Asia Singap~ 190455 125491 160088 184926 204834 189835 167479
## 6 Asia Malays~ 186791 121267 180883 161296 181911 187788 157650
## # ... with 9 more variables: `2009` <chr>, `2010` <dbl>, `2011` <dbl>,
## # `2012` <dbl>, `2013` <dbl>, `2014` <dbl>, `2015` <dbl>, `2016` <dbl>,
## # `2017` <dbl>
head(Foreignernation)
## # A tibble: 6 x 17
## Area Nation `2002` `2003` `2004` `2005` `2006` `2007` `2008` `2009`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 Asia Japan 977705 659972 890444 1.13e6 1.16e6 11705~ 10905~ 10076~
## 2 Asia Korea~ 86408 94060 149182 1.85e5 1.99e5 228582 252973 170646
## 3 Asia India 17379 15817 20538 2.20e4 2.25e4 24678 23327 22126
## 4 Asia Middl~ 10224 8422 12838 1.33e4 1.27e4 13446 12356 12182
## 5 Asia Malay~ 66516 75869 105246 1.24e5 1.33e5 159839 171630 184577
## 6 Asia Singa~ 80028 66629 101379 1.47e5 1.63e5 184303 189330 180819
## # ... with 7 more variables: `2010` <dbl>, `2011` <dbl>, `2012` <dbl>,
## # `2013` <dbl>, `2014` <dbl>, `2015` <dbl>, `2016` <dbl>
Step2: Use select()
and slice()
to set up the range of countries and years.
Foreignernation1<- slice( Foreignernation, 1,2,5,6,7,8,9,10)
Foreignernation2<- select( Foreignernation1,2,13,14,15,16,17)
head(Foreignernation2)
## # A tibble: 6 x 6
## Nation `2012` `2013` `2014` `2015` `2016`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Japan 1443009 1434346 1637264 1629193 1896456
## 2 Korea,Republic of 262340 355473 531703 662670 887412
## 3 Malaysia 367817 424053 464518 458401 500496
## 4 Singapore 297624 330293 341857 354767 371663
## 5 Indonesia 169136 176919 186558 181734 192053
## 6 Philippines 105525 101594 136998 139758 171816
Taiwanesedestination1 <- slice( Taiwanesedestination, 3,4,6,5,9,8,7,11 )
Taiwanesedestination2 <- select( Taiwanesedestination1, 2,13,14,15,16,17)
head(Taiwanesedestination2)
## # A tibble: 6 x 6
## country `2012` `2013` `2014` `2015` `2016`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Japan 1560300 2346007 2971846 3797879 4295240
## 2 Korea,Republic of 532729 518528 626694 500100 808420
## 3 Malaysia 193170 226919 198902 201631 245298
## 4 Singapore 241893 297588 283925 318516 319915
## 5 Indonesia 198893 166378 170301 176478 175738
## 6 Philippines 211385 129361 133583 180091 231801
Step3: Use names()
to rename the name of column
(FT=Foreign Tourist / TT=Taiwanese Tourist)
names(Foreignernation2)[names(Foreignernation2)=="2012"]<-"2012_FT"
names(Foreignernation2)[names(Foreignernation2)=="2013"]<-"2013_FT"
names(Foreignernation2)[names(Foreignernation2)=="2014"]<-"2014_FT"
names(Foreignernation2)[names(Foreignernation2)=="2015"]<-"2015_FT"
names(Foreignernation2)[names(Foreignernation2)=="2016"]<-"2016_FT"
names(Taiwanesedestination2)[names(Taiwanesedestination2)=="2012"]<-"2012_TT"
names(Taiwanesedestination2)[names(Taiwanesedestination2)=="2013"]<-"2013_TT"
names(Taiwanesedestination2)[names(Taiwanesedestination2)=="2014"]<-"2014_TT"
names(Taiwanesedestination2)[names(Taiwanesedestination2)=="2015"]<-"2015_TT"
names(Taiwanesedestination2)[names(Taiwanesedestination2)=="2016"]<-"2016_TT"
head(Foreignernation2)
## # A tibble: 6 x 6
## Nation `2012_FT` `2013_FT` `2014_FT` `2015_FT` `2016_FT`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Japan 1443009 1434346 1637264 1629193 1896456
## 2 Korea,Republic of 262340 355473 531703 662670 887412
## 3 Malaysia 367817 424053 464518 458401 500496
## 4 Singapore 297624 330293 341857 354767 371663
## 5 Indonesia 169136 176919 186558 181734 192053
## 6 Philippines 105525 101594 136998 139758 171816
head(Taiwanesedestination2)
## # A tibble: 6 x 6
## country `2012_TT` `2013_TT` `2014_TT` `2015_TT` `2016_TT`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Japan 1560300 2346007 2971846 3797879 4295240
## 2 Korea,Republic of 532729 518528 626694 500100 808420
## 3 Malaysia 193170 226919 198902 201631 245298
## 4 Singapore 241893 297588 283925 318516 319915
## 5 Indonesia 198893 166378 170301 176478 175738
## 6 Philippines 211385 129361 133583 180091 231801
Step4: Integrate two data frames with cbind()
, then delete the “country” column by assigning it to NULL
.
Comparison <- cbind(Foreignernation2, Taiwanesedestination2)
Comparison$country = NULL
head(Comparison)
## Nation 2012_FT 2013_FT 2014_FT 2015_FT 2016_FT 2012_TT
## 1 Japan 1443009 1434346 1637264 1629193 1896456 1560300
## 2 Korea,Republic of 262340 355473 531703 662670 887412 532729
## 3 Malaysia 367817 424053 464518 458401 500496 193170
## 4 Singapore 297624 330293 341857 354767 371663 241893
## 5 Indonesia 169136 176919 186558 181734 192053 198893
## 6 Philippines 105525 101594 136998 139758 171816 211385
## 2013_TT 2014_TT 2015_TT 2016_TT
## 1 2346007 2971846 3797879 4295240
## 2 518528 626694 500100 808420
## 3 226919 198902 201631 245298
## 4 297588 283925 318516 319915
## 5 166378 170301 176478 175738
## 6 129361 133583 180091 231801