Some data in this lab comes from the OCS “Exploring CO2 emissions across time” activity (https://www.opencasestudies.org/ocs-bp-co2-emissions/. This dataset is available in the dasehr package.

Additional data about climate change disasters can be found at “https://daseh.org/data/Yearly_CC_Disasters.csv”.

library(tidyverse)
library(dasehr)

Part 1

1.1

Open the yearly_co2_emissions dataset from the dasehr package and assign it to an object called co2. (You can also use read_csv() from the readr package and download the dataset directly from the daseh.org website: “https://daseh.org/data/Yearly_CO2_Emissions_1000_tonnes.csv”)

  • Remember to use read_csv() from the readr package.
  • Do NOT use read.csv().
co2 <- yearly_co2_emissions

co2 <- read_csv("https://daseh.org/data/Yearly_CO2_Emissions_1000_tonnes.csv")
## Rows: 192 Columns: 265
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (1): country
## dbl (264): 1751, 1752, 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760, 1761,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

1.2

Look at the column names using colnames - do you notice any patterns?

colnames(co2)
##   [1] "country" "1751"    "1752"    "1753"    "1754"    "1755"    "1756"   
##   [8] "1757"    "1758"    "1759"    "1760"    "1761"    "1762"    "1763"   
##  [15] "1764"    "1765"    "1766"    "1767"    "1768"    "1769"    "1770"   
##  [22] "1771"    "1772"    "1773"    "1774"    "1775"    "1776"    "1777"   
##  [29] "1778"    "1779"    "1780"    "1781"    "1782"    "1783"    "1784"   
##  [36] "1785"    "1786"    "1787"    "1788"    "1789"    "1790"    "1791"   
##  [43] "1792"    "1793"    "1794"    "1795"    "1796"    "1797"    "1798"   
##  [50] "1799"    "1800"    "1801"    "1802"    "1803"    "1804"    "1805"   
##  [57] "1806"    "1807"    "1808"    "1809"    "1810"    "1811"    "1812"   
##  [64] "1813"    "1814"    "1815"    "1816"    "1817"    "1818"    "1819"   
##  [71] "1820"    "1821"    "1822"    "1823"    "1824"    "1825"    "1826"   
##  [78] "1827"    "1828"    "1829"    "1830"    "1831"    "1832"    "1833"   
##  [85] "1834"    "1835"    "1836"    "1837"    "1838"    "1839"    "1840"   
##  [92] "1841"    "1842"    "1843"    "1844"    "1845"    "1846"    "1847"   
##  [99] "1848"    "1849"    "1850"    "1851"    "1852"    "1853"    "1854"   
## [106] "1855"    "1856"    "1857"    "1858"    "1859"    "1860"    "1861"   
## [113] "1862"    "1863"    "1864"    "1865"    "1866"    "1867"    "1868"   
## [120] "1869"    "1870"    "1871"    "1872"    "1873"    "1874"    "1875"   
## [127] "1876"    "1877"    "1878"    "1879"    "1880"    "1881"    "1882"   
## [134] "1883"    "1884"    "1885"    "1886"    "1887"    "1888"    "1889"   
## [141] "1890"    "1891"    "1892"    "1893"    "1894"    "1895"    "1896"   
## [148] "1897"    "1898"    "1899"    "1900"    "1901"    "1902"    "1903"   
## [155] "1904"    "1905"    "1906"    "1907"    "1908"    "1909"    "1910"   
## [162] "1911"    "1912"    "1913"    "1914"    "1915"    "1916"    "1917"   
## [169] "1918"    "1919"    "1920"    "1921"    "1922"    "1923"    "1924"   
## [176] "1925"    "1926"    "1927"    "1928"    "1929"    "1930"    "1931"   
## [183] "1932"    "1933"    "1934"    "1935"    "1936"    "1937"    "1938"   
## [190] "1939"    "1940"    "1941"    "1942"    "1943"    "1944"    "1945"   
## [197] "1946"    "1947"    "1948"    "1949"    "1950"    "1951"    "1952"   
## [204] "1953"    "1954"    "1955"    "1956"    "1957"    "1958"    "1959"   
## [211] "1960"    "1961"    "1962"    "1963"    "1964"    "1965"    "1966"   
## [218] "1967"    "1968"    "1969"    "1970"    "1971"    "1972"    "1973"   
## [225] "1974"    "1975"    "1976"    "1977"    "1978"    "1979"    "1980"   
## [232] "1981"    "1982"    "1983"    "1984"    "1985"    "1986"    "1987"   
## [239] "1988"    "1989"    "1990"    "1991"    "1992"    "1993"    "1994"   
## [246] "1995"    "1996"    "1997"    "1998"    "1999"    "2000"    "2001"   
## [253] "2002"    "2003"    "2004"    "2005"    "2006"    "2007"    "2008"   
## [260] "2009"    "2010"    "2011"    "2012"    "2013"    "2014"
# Most column names are years

1.3

Let’s rename the column “2011” in “co2” to “CO2_2011” using rename. Repeat this for the years 2012, 2013, and 2014. Make sure to reassign to co2 here and in subsequent steps.

# General format
new_data <- old_data %>% rename(newname = oldname)
co2 <- co2 %>% rename(
  CO2_2011 = `2011`,
  CO2_2012 = `2012`,
  CO2_2013 = `2013`,
  CO2_2014 = `2014`
)

1.4

Select only the columns “country”, and those that start with “CO2_”. Use select and starts_with("CO2_").

# General format
new_data <- old_data %>% select(colname1, colname2, ...)
co2 <- co2 %>% select(country, starts_with("CO2_"))

1.5

Create a new dataset “co2_long” that does pivot_longer() on all columns except “country”. Remember that !country means all columns except “country”.

# General format
new_data <- old_data %>% pivot_longer(cols = colname(s))
co2_long <- co2 %>% pivot_longer(cols = !country)

1.6

Using co2_long, filter the “country” column so it only includes values from Indonesia and Canada. Hint: use filter and %in%.

# General format
new_data <- old_data %>% filter(colname %in% c(...))
co2_long <- co2_long %>%
  filter(country %in% c("Indonesia", "Canada"))

1.7

Use pivot_wider to reshape “co2_long”. Use “county” for the names_from argument. Use “value” for the values_from argument. Call this new data co2_wide. Look at the data. How do these years compare to one another?

# General format
new_data <- old_data %>% pivot_wider(names_from = column1, values_from = column2)
co2_wide <- co2_long %>%
  pivot_wider(
    names_from = country,
    values_from = value
  )
co2_wide
## # A tibble: 4 × 3
##   name     Canada Indonesia
##   <chr>     <dbl>     <dbl>
## 1 CO2_2011 537000    604000
## 2 CO2_2012 517000    637000
## 3 CO2_2013 517000    490000
## 4 CO2_2014 537000    464000

Practice on Your Own!

P.1

Take the code from Questions 1.1 and 1.3-1.7. Chain all of this code together using the pipe %>%. Call your data co2_compare.

co2_compare <-
  yearly_co2_emissions %>%
  rename(
    CO2_2011 = `2011`,
    CO2_2012 = `2012`,
    CO2_2013 = `2013`,
    CO2_2014 = `2014`
  ) %>% select(country, starts_with("CO2_")) %>%
  pivot_longer(cols = !country) %>%
  filter(country %in% c("Indonesia", "Canada")) %>%
  pivot_wider(names_from = country, values_from = value)

co2_compare
## # A tibble: 4 × 3
##   name     Canada Indonesia
##   <chr>     <dbl>     <dbl>
## 1 CO2_2011 537000    604000
## 2 CO2_2012 517000    637000
## 3 CO2_2013 517000    490000
## 4 CO2_2014 537000    464000

P.2

Modify the code from Question P.1:

  • Choose 4 different years to examine
  • Select different countries to compare
  • Call your data co2_compare2
co2_compare2 <-
  yearly_co2_emissions %>%
  rename(
    CO2_1950 = `1950`,
    CO2_1960 = `1960`,
    CO2_1970 = `1970`,
    CO2_1980 = `1980`
  ) %>% select(country, starts_with("CO2_")) %>%
  pivot_longer(cols = !country) %>%
  filter(country %in% c("Brazil", "Mexico")) %>%
  pivot_wider(names_from = country, values_from = value)

co2_compare2
## # A tibble: 4 × 3
##   name     Brazil Mexico
##   <chr>     <dbl>  <dbl>
## 1 CO2_1950  19700  30500
## 2 CO2_1960  46900  63100
## 3 CO2_1970  93800 114000
## 4 CO2_1980 187000 268000

Part 2

2.1

Open the Yearly_CC_Disasters dataset using the url below. Save the dataset as an object called “cc”. rename the column “Country” to “country”.

https://daseh.org/data/Yearly_CC_Disasters.csv

cc <- read_csv("https://daseh.org/data/Yearly_CC_Disasters.csv") %>% 
  rename(country = Country)
## Rows: 970 Columns: 53
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (9): Country, ISO2, ISO3, Indicator, Unit, Source, CTS Code, CTS Name, ...
## dbl (44): ObjectId, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 19...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

2.2

Join the datasets “co2” and “cc” by “country”. Keep all rows, even if there is no match. How many rows are there?

# General format
full_join(data1, data2, by = "")
full_join(co2, cc, by = "country")
## # A tibble: 1,032 × 57
##    country    CO2_2011 CO2_2012 CO2_2013 CO2_2014 ObjectId ISO2  ISO3  Indicator
##    <chr>         <dbl>    <dbl>    <dbl>    <dbl>    <dbl> <chr> <chr> <chr>    
##  1 Afghanist…    12200    10800    10000     9810       NA <NA>  <NA>  <NA>     
##  2 Albania        5240     4910     5060     5720        8 AL    ALB   Climate …
##  3 Albania        5240     4910     5060     5720        9 AL    ALB   Climate …
##  4 Albania        5240     4910     5060     5720       10 AL    ALB   Climate …
##  5 Albania        5240     4910     5060     5720       11 AL    ALB   Climate …
##  6 Albania        5240     4910     5060     5720       12 AL    ALB   Climate …
##  7 Albania        5240     4910     5060     5720       13 AL    ALB   Climate …
##  8 Albania        5240     4910     5060     5720       14 AL    ALB   Climate …
##  9 Algeria      121000   130000   134000   145000       15 DZ    DZA   Climate …
## 10 Algeria      121000   130000   134000   145000       16 DZ    DZA   Climate …
## # ℹ 1,022 more rows
## # ℹ 48 more variables: Unit <chr>, Source <chr>, `CTS Code` <chr>,
## #   `CTS Name` <chr>, `CTS Full Descriptor` <chr>, `1980` <dbl>, `1981` <dbl>,
## #   `1982` <dbl>, `1983` <dbl>, `1984` <dbl>, `1985` <dbl>, `1986` <dbl>,
## #   `1987` <dbl>, `1988` <dbl>, `1989` <dbl>, `1990` <dbl>, `1991` <dbl>,
## #   `1992` <dbl>, `1993` <dbl>, `1994` <dbl>, `1995` <dbl>, `1996` <dbl>,
## #   `1997` <dbl>, `1998` <dbl>, `1999` <dbl>, `2000` <dbl>, `2001` <dbl>, …
# 1,032 rows

2.3

Join the datasets “co2” and “cc” by “country”. Keep only rows that match both datasets. How many rows are there?

# General format
inner_join(data1, data2, by = "")
inner_join(co2, cc, by = "country")
## # A tibble: 628 × 57
##    country CO2_2011 CO2_2012 CO2_2013 CO2_2014 ObjectId ISO2  ISO3  Indicator   
##    <chr>      <dbl>    <dbl>    <dbl>    <dbl>    <dbl> <chr> <chr> <chr>       
##  1 Albania     5240     4910     5060     5720        8 AL    ALB   Climate rel…
##  2 Albania     5240     4910     5060     5720        9 AL    ALB   Climate rel…
##  3 Albania     5240     4910     5060     5720       10 AL    ALB   Climate rel…
##  4 Albania     5240     4910     5060     5720       11 AL    ALB   Climate rel…
##  5 Albania     5240     4910     5060     5720       12 AL    ALB   Climate rel…
##  6 Albania     5240     4910     5060     5720       13 AL    ALB   Climate rel…
##  7 Albania     5240     4910     5060     5720       14 AL    ALB   Climate rel…
##  8 Algeria   121000   130000   134000   145000       15 DZ    DZA   Climate rel…
##  9 Algeria   121000   130000   134000   145000       16 DZ    DZA   Climate rel…
## 10 Algeria   121000   130000   134000   145000       17 DZ    DZA   Climate rel…
## # ℹ 618 more rows
## # ℹ 48 more variables: Unit <chr>, Source <chr>, `CTS Code` <chr>,
## #   `CTS Name` <chr>, `CTS Full Descriptor` <chr>, `1980` <dbl>, `1981` <dbl>,
## #   `1982` <dbl>, `1983` <dbl>, `1984` <dbl>, `1985` <dbl>, `1986` <dbl>,
## #   `1987` <dbl>, `1988` <dbl>, `1989` <dbl>, `1990` <dbl>, `1991` <dbl>,
## #   `1992` <dbl>, `1993` <dbl>, `1994` <dbl>, `1995` <dbl>, `1996` <dbl>,
## #   `1997` <dbl>, `1998` <dbl>, `1999` <dbl>, `2000` <dbl>, `2001` <dbl>, …
# 628 rows

2.4

What countries are present in “co2” that are not present in “cc”? Use anti_join. What countries are present in “cc” that are not present in “co2”?

# General format
anti_join(data1, data2, by = "") %>% select(index)
anti_join(co2, cc, by = "country") %>% select(country)
## # A tibble: 62 × 1
##    country                 
##    <chr>                   
##  1 Afghanistan             
##  2 Andorra                 
##  3 Armenia                 
##  4 Azerbaijan              
##  5 Bahamas                 
##  6 Bahrain                 
##  7 Belarus                 
##  8 Brunei                  
##  9 Cape Verde              
## 10 Central African Republic
## # ℹ 52 more rows
anti_join(cc, co2, by = "country") %>% select(country) %>% distinct()
## # A tibble: 85 × 1
##    country                     
##    <chr>                       
##  1 Afghanistan, Islamic Rep. of
##  2 American Samoa              
##  3 Anguilla                    
##  4 Armenia, Rep. of            
##  5 Azerbaijan, Rep. of         
##  6 Azores Island               
##  7 Bahamas, The                
##  8 Belarus, Rep. of            
##  9 Bermuda                     
## 10 British Virgin Islands      
## # ℹ 75 more rows

Practice on Your Own!

P.3

Take the code from 2.2 and save the output as an object “co2_cc”. Filter the dataset. Filter so that you only keep Indonesia and Canada.

co2_cc <- full_join(co2, cc, by = "country") %>% 
  filter(country %in% c("Indonesia", "Canada"))

P.4

Select:

  • the “country” column
  • data from the years 2014 originally in BOTH DATASETS (columns “CO2_2014” and 2014)
  • the “Indicator” column

Rename:

  • emissions = CO2_2014
  • disasters = 2014

Reassign to “co2_cc”.

co2_cc <- co2_cc %>% select(country, CO2_2014, Indicator, `2014`) %>%
  rename(emissions = CO2_2014, disasters = `2014`)

P.5

Use stringr to trim the piece of text, “Climate related disasters frequency, Number of Disasters:”, from the “Indicator” column. You will use the function str_remove() to do this. It works similarly to other stringr functions. Try to intuit how it works by using the documentation page (?str_remove).

library(stringr)
co2_cc <- co2_cc %>% mutate(Indicator = str_remove(
  Indicator,
  "Climate related disasters frequency, Number of Disasters: "
))

P.6

Pivot the dataset so that there are columns for country, emissions, and a column for each “Indicator”.

co2_cc %>% pivot_wider(
  names_from = Indicator, 
  values_from = disasters
)
## # A tibble: 2 × 9
##   country   emissions Drought `Extreme temperature` Flood Storm TOTAL Wildfire
##   <chr>         <dbl>   <dbl>                 <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 Canada       537000      NA                     1     1    NA     2       NA
## 2 Indonesia    464000      NA                    NA     6    NA     8       NA
## # ℹ 1 more variable: Landslide <dbl>