25th April 2018
J.J. Allaire & François Chollet : https://www.manning.com/books/deep-learning-with-r
Leon Eyrich Jessen : https://tensorflow.rstudio.com/blog/dl-for-cancer-immunotherapy.html
R Keras : https://keras.rstudio.com/
Python Keras : https://keras.io/
TensorFlow : https://www.tensorflow.org/
Andrew Ng : https://www.coursera.org/learn/machine-learning
iedb_peps %>% sample_n(5)
## # A tibble: 5 x 27 ## `Epitope ID` `Object Type` Description `Epitope Modifi~ `Epitope Modifi~ ## <int> <chr> <chr> <chr> <chr> ## 1 468253 Linear pepti~ LPTVPGSSI <NA> <NA> ## 2 509910 Linear pepti~ LPQEIKANV <NA> <NA> ## 3 463972 Linear pepti~ DAFNTTFISTI <NA> <NA> ## 4 450283 Linear pepti~ YPPAPFMHI <NA> <NA> ## 5 464203 Linear pepti~ EAAEVILRV <NA> <NA> ## # ... with 22 more variables: `Starting Position` <int>, `Ending ## # Position` <int>, `Non-peptidic epitope Accession` <chr>, `Epitope ## # Synonyms` <chr>, `Antigen Name` <chr>, `Parent Protein` <chr>, `Parent ## # Protein Accession` <chr>, `Organism Name` <chr>, `Parent ## # Organism` <chr>, `Parent Organism ID` <int>, `Epitope Comments` <chr>, ## # `Epitope Relationship` <chr>, `Object Type_1` <chr>, ## # Description_1 <chr>, `Starting Position_1` <int>, `Ending ## # Position_1` <int>, `Non-peptidic object Accession` <chr>, ## # Synonyms <chr>, `Antigen Name_1` <chr>, `Parent Protein_1` <chr>, ## # `Organism Name_1` <chr>, `Parent Organism_1` <chr>
pep_dat %>% sample_n(10)
## # A tibble: 10 x 4 ## peptide label_chr label_num data_type ## <chr> <chr> <int> <chr> ## 1 RAIDFSPYL C03 3 train ## 2 LPPPPMQYI B51 2 train ## 3 FSIKMFLTV C15 4 train ## 4 DPYATAFLV B51 2 train ## 5 FALGAGTAL C03 3 test ## 6 SIIDEFLHI C15 4 train ## 7 NADSATRLL C03 3 train ## 8 FASTILHLV C15 4 train ## 9 ILRNPGFAL C03 3 train ## 10 GEVAPSMFL B40 1 train
# Input shape: 9 peptides x 20 amino acids = 180 # Output shape : 5 MHC I allotypes # relu activation : function for performing non-linear transformations. # expanding hypothesis space. # Dropout layer : helps deal with overfitting. Drops features from output layer. # Initialize sequential model model <- keras_model_sequential() # Build architecture model %>% layer_dense(units = 180, activation = 'relu', input_shape = 180) %>% layer_dropout(rate = 0.4) %>% layer_dense(units = 90, activation = 'relu') %>% layer_dropout(rate = 0.3) %>% layer_dense(units = 5, activation = 'softmax') # Return probability scores # for each class # Total params: 49,325
# Compile model model %>% compile( loss = 'categorical_crossentropy', # Feedback loss function, distance. optimizer = optimizer_rmsprop(), # Gradient descent rules metrics = c('accuracy')) # How often we match the target # Train the model history <- model %>% fit( x_train, y_train, epochs = 10, # 10 iterations over all training data batch_size = 80, # 80 peptides per batch validation_split = 0.2) # Use 20% of training data for validation targets
shared_peps %>% filter(label_chr.y == "NB") %>% inner_join(pep_dat, by = "peptide") %>% sample_n(6) %>% select(peptide, model_class = label_chr.x, iedb_class = label_chr)
## # A tibble: 6 x 3 ## peptide model_class iedb_class ## <chr> <chr> <chr> ## 1 NAFLEKKTV B51 B51 ## 2 LILTPTREL C03 C03 ## 3 VNGRLLLTL C15 C15 ## 4 DAALDFKNV B51 B51 ## 5 FDAKRLIGR A31 A31 ## 6 QLGQFLQHR A31 A31
slides: http://ab604.github.io/mhc_tensorflow_presentation.html
email: ab604@soton.ac.uk
twitter: @alistair604