getwd()
Ideally, you just want to have everything, all files and your Rmarkdown file/R scripts, in your working directory. To see where your working directory is, use getwd()
with no arguments.
getwd()
## [1] "/Users/kelseykeith/Documents/fels_git_repository_Fels_Bioinformatics_Meetup/week7"
setwd()
However, if you need to set your working directory to somewhere other than where your Rmarkdown file/R script is (not recommended), use setwd()
with the path to the directory you want to use. (NOTE: Any file path I put here will NOT work if you try running it on your computer. That’s the downside of setwd()
. Try setting your own working directory instead.)
#setwd('/this_is_a_fake_path/to_show_function_syntax/')
dir()
You might also want to see what’s in your working directory. To do that use dir()
.
dir()
## [1] "biopsy_margins.png"
## [2] "biostat_class_files"
## [3] "cultivar_color.csv"
## [4] "cultivar_color.tsv"
## [5] "demo_files"
## [6] "import_data_and_review.html"
## [7] "import_data_and_review.Rmd"
## [8] "mess_up_datasets_for_week7.Rmd"
## [9] "practice_files"
## [10] "presentation_files"
## [11] "sparrow_survival.tsv"
## [12] "sparrows_weight_by_age_sex.png"
## [13] "week7_practice_combine_skills_ANSWERS.Rmd"
## [14] "wine_cult.png"
read_tsv()
Reads in tab separated value (tsv) files
wine <- read_tsv('./demo_files/wine.tsv')
## Parsed with column specification:
## cols(
## Cultivar = col_integer(),
## Alcohol = col_double(),
## MalicAcid = col_double(),
## Ash = col_double(),
## Magnesium = col_integer(),
## TotalPhenol = col_double(),
## Flavanoids = col_double(),
## NonflavPhenols = col_double(),
## Color = col_double()
## )
read_csv()
Reads in comma separated value (csv) files
sparrows <- read_csv('demo_files/sparrows.csv')
## Parsed with column specification:
## cols(
## Sex = col_character(),
## Age = col_character(),
## Survival = col_character(),
## Length = col_integer(),
## Wingspread = col_integer(),
## Weight = col_double(),
## Skull_Length = col_double(),
## Humerus_Length = col_double(),
## Femur_Length = col_double(),
## Tarsus_Length = col_double(),
## Sternum_Length = col_double(),
## Skull_Width = col_double()
## )
read_delim()
Reads in files. You have to specify what the file is delimited by.
biopsy <- read_delim('demo_files/biopsy.txt', delim = ' ')
## Parsed with column specification:
## cols(
## clump_thickness = col_integer(),
## uniform_cell_size = col_integer(),
## uniform_cell_shape = col_integer(),
## marg_adhesion = col_integer(),
## epithelial_cell_size = col_integer(),
## bare_nuclei = col_integer(),
## bland_chromatin = col_integer(),
## normal_nucleoli = col_integer(),
## mitoses = col_integer(),
## outcome = col_character()
## )
Sometimes there aren’t column names, or the column names are in a file header or are not read in properly. This example has column names in a commented header.
rowan <- read_csv('demo_files/rowan.csv')
## Parsed with column specification:
## cols(
## `##altitude resp.rate species leaf.len nesting` = col_integer()
## )
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 300 parsing failures.
## row # A tibble: 5 x 5 col row col expected actual file expected <int> <chr> <chr> <chr> <chr> actual 1 1 <NA> 1 columns 5 columns 'demo_files/rowan.csv' file 2 2 <NA> 1 columns 5 columns 'demo_files/rowan.csv' row 3 3 <NA> 1 columns 5 columns 'demo_files/rowan.csv' col 4 4 <NA> 1 columns 5 columns 'demo_files/rowan.csv' expected 5 5 <NA> 1 columns 5 columns 'demo_files/rowan.csv'
## ... ................. ... ........................................................ ........ ........................................................ ...... ........................................................ .... ........................................................ ... ........................................................ ... ........................................................ ........ ........................................................
## See problems(...) for more details.
We can solve the problem by specifying the column names and that the header is a comment or skipping the first line will do the same thing.
rowan <- read_csv('demo_files/rowan.csv',
col_names = c('altitude', 'resp.rate', 'species', 'leaf.len', 'nesting'),
comment = '#')
## Parsed with column specification:
## cols(
## altitude = col_integer(),
## resp.rate = col_double(),
## species = col_character(),
## leaf.len = col_double(),
## nesting = col_character()
## )
rowan <- read_csv('demo_files/rowan.csv',
col_names = c('altitude', 'resp.rate', 'species', 'leaf.len', 'nesting'),
skip = 1)
## Parsed with column specification:
## cols(
## altitude = col_integer(),
## resp.rate = col_double(),
## species = col_character(),
## leaf.len = col_double(),
## nesting = col_character()
## )
read_*()
functions guess data types from the first 1,000 rows. Guess where this always fails? Chromosomes!
When you want to save a data table that you’ve made inside R, you have to write the table.
write_tsv()
This will save your table with tabs to delimit the data.
# change something about biopsy
wine %>% select(Cultivar, Color) -> wine_cult_col
# save it as a tsv
write_tsv(wine_cult_col, 'cultivar_color.tsv')
write_csv()
This will save your table with commas to delimit the data.
wine %>% select(Cultivar, Color) %>% write_csv('cultivar_color.csv')
write_delim()
You can specify what you want as a delimiter using write_delim()
sparrows %>% group_by(Sex, Age, Survival) %>% count() -> sparrow_survival
write_delim(sparrow_survival, 'sparrow_survival.tsv', delim = '\t')
png('sparrows_weight_by_age_sex.png')
ggplot(sparrows, aes(x = Age, y = Weight, fill = Age)) +
geom_violin(alpha = 0.8) +
scale_fill_manual(values = c('darkcyan', 'hotpink')) +
facet_wrap(~ Sex) +
theme_classic() +
theme(legend.position = 'none')
dev.off()
## quartz_off_screen
## 2
ggsave()
ggplot has its own way of saving plots, ggsave()
. It will automatically save the last plot run in memory, or you can specify what plot to save. It will also autodetect the image filetype from the extension given in the filename you give it.
# automatic
ggplot(biopsy, aes(x = marg_adhesion, fill = outcome )) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c('darkgray', 'firebrick')) +
labs(x = 'margin adhesion') +
theme_classic()
ggsave('biopsy_margins.png')
## Saving 6 x 4 in image
If you save the plot as an object, you can tell ggsave()
what plot object you want to save.
# specify saved plot
ggplot(wine, aes(x = Alcohol, y = Ash, color = as.factor(Cultivar))) +
geom_point() +
scale_color_viridis(discrete = T) +
labs(color = 'Cultivar') +
theme_classic() -> wine_plot
ggsave('wine_cult.png', plot = wine_plot)
## Saving 6 x 4 in image
biopsy <- read_tsv('demo_files/biopsy_inclass_demo.tsv')
## Parsed with column specification:
## cols(
## `### tumor_params = clump_thickness;uniform_cell_size;uniform_cell_shape;marg_adhesion;epithelial_cell_size;bare_nuclei;bland_chromatin;normal_nucleoli;mitoses` = col_character()
## )
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 684 parsing failures.
## row # A tibble: 5 x 5 col row col expected actual file expected <int> <chr> <chr> <chr> <chr> actual 1 1 <NA> 1 columns 2 columns 'demo_files/biopsy_inclass_demo.tsv' file 2 2 <NA> 1 columns 2 columns 'demo_files/biopsy_inclass_demo.tsv' row 3 3 <NA> 1 columns 2 columns 'demo_files/biopsy_inclass_demo.tsv' col 4 4 <NA> 1 columns 2 columns 'demo_files/biopsy_inclass_demo.tsv' expected 5 5 <NA> 1 columns 2 columns 'demo_files/biopsy_inclass_demo.tsv'
## ... ................. ... ...................................................................... ........ ...................................................................... ...... ...................................................................... .... ...................................................................... ... ...................................................................... ... ...................................................................... ........ ......................................................................
## See problems(...) for more details.