introduction to the tidyverse - s3.amazonaws.com · mutate(gdp = gdppercap * pop) # a tibble: 1,704...

25
DataCamp Introduction to the Tidyverse The gapminder dataset INTRODUCTION TO THE TIDYVERSE David Robinson Data Scientist, Stack Overflow

Upload: lythu

Post on 12-Jan-2019

218 views

Category:

Documents


0 download

TRANSCRIPT

DataCamp IntroductiontotheTidyverse

Thegapminderdataset

INTRODUCTIONTOTHETIDYVERSE

DavidRobinsonDataScientist,StackOverflow

DataCamp IntroductiontotheTidyverse

Tidyverse

DataCamp IntroductiontotheTidyverse

Gapminder

DataCamp IntroductiontotheTidyverse

DataCamp IntroductiontotheTidyverse

Loadingpackageslibrary(gapminder)

library(dplyr)

DataCamp IntroductiontotheTidyverse

Thegapminderdatasetgapminder

#Atibble:1,704x6countrycontinentyearlifeExppopgdpPercap<fctr><fctr><int><dbl><dbl><dbl>1AfghanistanAsia195228.8018425333779.44532AfghanistanAsia195730.3329240934820.85303AfghanistanAsia196231.99710267083853.10074AfghanistanAsia196734.02011537966836.19715AfghanistanAsia197236.08813079460739.98116AfghanistanAsia197738.43814880372786.11347AfghanistanAsia198239.85412881816978.01148AfghanistanAsia198740.82213867957852.39599AfghanistanAsia199241.67416317921649.341410AfghanistanAsia199741.76322227415635.3414#...with1,694morerows

DataCamp IntroductiontotheTidyverse

Let'spractice!

INTRODUCTIONTOTHETIDYVERSE

DataCamp IntroductiontotheTidyverse

Thefilterverb

INTRODUCTIONTOTHETIDYVERSE

DavidRobinsonDataScientist,StackOverflow

DataCamp IntroductiontotheTidyverse

Thefilterverb

DataCamp IntroductiontotheTidyverse

Filteringforoneyear

gapminder%>%filter(year==2007)

#Atibble:142x6countrycontinentyearlifeExppopgdpPercap<fctr><fctr><int><dbl><dbl><dbl>1AfghanistanAsia200743.82831889923974.58032AlbaniaEurope200776.42336005235937.02953AlgeriaAfrica200772.301333332166223.36754AngolaAfrica200742.731124204764797.23135ArgentinaAmericas200775.3204030192712779.37966AustraliaOceania200781.2352043417634435.36747AustriaEurope200779.829819978336126.49278BahrainAsia200775.63570857329796.04839BangladeshAsia200764.0621504483391391.253810BelgiumEurope200779.4411039222633692.6051#...with132morerows

DataCamp IntroductiontotheTidyverse

Filteringforonecountry

gapminder%>%filter(country=="UnitedStates")

#Atibble:12x6countrycontinentyearlifeExppopgdpPercap<fctr><fctr><int><dbl><dbl><dbl>1UnitedStatesAmericas195268.44015755300013990.482UnitedStatesAmericas195769.49017198400014847.133UnitedStatesAmericas196270.21018653800016173.154UnitedStatesAmericas196770.76019871200019530.375UnitedStatesAmericas197271.34020989600021806.046UnitedStatesAmericas197773.38022023900024072.637UnitedStatesAmericas198274.65023218783525009.568UnitedStatesAmericas198775.02024280353329884.359UnitedStatesAmericas199276.09025689418932003.9310UnitedStatesAmericas199776.81027291176035767.4311UnitedStatesAmericas200277.31028767552639097.1012UnitedStatesAmericas200778.24230113994742951.65

DataCamp IntroductiontotheTidyverse

Filteringfortwovariables

gapminder%>%filter(year==2007,country=="UnitedStates")

#Atibble:1x6countrycontinentyearlifeExppopgdpPercap<fctr><fctr><int><dbl><dbl><dbl>1UnitedStatesAmericas200778.24230113994742951.65

DataCamp IntroductiontotheTidyverse

Let'spractice!

INTRODUCTIONTOTHETIDYVERSE

DataCamp IntroductiontotheTidyverse

Thearrangeverb

INTRODUCTIONTOTHETIDYVERSE

DavidRobinsonDataScientist,StackOverflow

DataCamp IntroductiontotheTidyverse

Thearrangeverb

DataCamp IntroductiontotheTidyverse

Sortingwitharrange

gapminder%>%arrange(gdpPercap)

#Atibble:1,704x6countrycontinentyearlifeExppopgdpPercap<fctr><fctr><int><dbl><dbl><dbl>1Congo,Dem.Rep.Africa200244.96655379852241.16592Congo,Dem.Rep.Africa200746.46264606759277.55193LesothoAfrica195242.138748747298.84624Guinea-BissauAfrica195232.500580653299.85035Congo,Dem.Rep.Africa199742.58747798986312.18846EritreaAfrica195235.9281438760328.94067MyanmarAsia195236.31920092996331.00008LesothoAfrica195745.047813338335.99719BurundiAfrica195239.0312445618339.296510EritreaAfrica195738.0471542611344.1619#...with1,694morerows

DataCamp IntroductiontotheTidyverse

Sortingindescendingorder

gapminder%>%arrange(desc(gdpPercap))

#Atibble:1,704x6countrycontinentyearlifeExppopgdpPercap<fctr><fctr><int><dbl><dbl><dbl>1KuwaitAsia195758.033212846113523.132KuwaitAsia197267.712841934109347.873KuwaitAsia195255.565160000108382.354KuwaitAsia196260.47035826695458.115KuwaitAsia196764.62457500380894.886KuwaitAsia197769.343114035759265.487NorwayEurope200780.196462792649357.198KuwaitAsia200777.588250555947306.999SingaporeAsia200779.972455300947143.1810NorwayEurope200279.050453559144683.98#...with1,694morerows

DataCamp IntroductiontotheTidyverse

Filteringthenarranging

gapminder%>%filter(year==2007)%>%arrange(desc(gdpPercap))

#Atibble:142x6countrycontinentyearlifeExppopgdpPercap<fctr><fctr><int><dbl><dbl><dbl>1NorwayEurope200780.196462792649357.192KuwaitAsia200777.588250555947306.993SingaporeAsia200779.972455300947143.184UnitedStatesAmericas200778.24230113994742951.655IrelandEurope200778.885410908640676.006HongKong,ChinaAsia200782.208698041239724.987SwitzerlandEurope200781.701755466137506.428NetherlandsEurope200779.7621657061336797.939CanadaAmericas200780.6533339014136319.2410IcelandEurope200781.75730193136180.79#...with132morerows

DataCamp IntroductiontotheTidyverse

Let'spractice!

INTRODUCTIONTOTHETIDYVERSE

DataCamp IntroductiontotheTidyverse

Themutateverb

INTRODUCTIONTOTHETIDYVERSE

DavidRobinsonDataScientist,StackOverflow

DataCamp IntroductiontotheTidyverse

Themutateverb

DataCamp IntroductiontotheTidyverse

Usingmutatetochangeavariable

gapminder%>%mutate(pop=pop/1000000)

#Atibble:1,704x6countrycontinentyearlifeExppopgdpPercap<fctr><fctr><int><dbl><dbl><dbl>1AfghanistanAsia195228.8018.425333779.44532AfghanistanAsia195730.3329.240934820.85303AfghanistanAsia196231.99710.267083853.10074AfghanistanAsia196734.02011.537966836.19715AfghanistanAsia197236.08813.079460739.98116AfghanistanAsia197738.43814.880372786.11347AfghanistanAsia198239.85412.881816978.01148AfghanistanAsia198740.82213.867957852.39599AfghanistanAsia199241.67416.317921649.341410AfghanistanAsia199741.76322.227415635.3414#...with1,694morerows

DataCamp IntroductiontotheTidyverse

Usingmutatetoaddanewvariablegapminder%>%mutate(gdp=gdpPercap*pop)

#Atibble:1,704x7countrycontinentyearlifeExppopgdpPercapgdp<fctr><fctr><int><dbl><dbl><dbl><dbl>1AfghanistanAsia195228.8018425333779.445365670863302AfghanistanAsia195730.3329240934820.853075854486703AfghanistanAsia196231.99710267083853.100787588557974AfghanistanAsia196734.02011537966836.197196480141505AfghanistanAsia197236.08813079460739.981196785532746AfghanistanAsia197738.43814880372786.1134116976592317AfghanistanAsia198239.85412881816978.0114125985634018AfghanistanAsia198740.82213867957852.3959118209903099AfghanistanAsia199241.67416317921649.34141059590158910AfghanistanAsia199741.76322227415635.341414121995875#...with1,694morerows

DataCamp IntroductiontotheTidyverse

Combiningverbs

gapminder%>%mutate(gdp=gdpPercap*pop)%>%filter(year==2007)%>%arrange(desc(gdp))

#Atibble:142x7countrycontinentyearlifeExppopgdpPercapgdp<fctr><fctr><int><dbl><dbl><dbl><dbl>1UnitedStatesAmericas200778.24230113994742951.6531.293446e+132ChinaAsia200772.96113186830964959.1156.539501e+123JapanAsia200782.60312746797231656.0684.035135e+124IndiaAsia200764.69811103963312452.2102.722925e+125GermanyEurope200779.4068240099632170.3742.650871e+126UnitedKingdomEurope200779.4256077623833203.2612.017969e+127FranceEurope200780.6576108391630470.0171.861228e+128BrazilAmericas200772.3901900106479065.8011.722599e+129ItalyEurope200780.5465814773328569.7201.661264e+1210MexicoAmericas200776.19510870089111977.5751.301973e+12#...with132morerows

DataCamp IntroductiontotheTidyverse

Let'spractice!

INTRODUCTIONTOTHETIDYVERSE