2020 |
Papadimitriou, Sofia 2020, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/312576, title = {Towards multivariant pathogenicity predictions: Using machine-learning to directly predict and explore disease-causing oligogenic variant combinations}, author = {Sofia Papadimitriou}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/312576}, year = {2020}, date = {2020-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Rasoafaraniaina, Rondrotiana J Preliminary test estimation in uniformly locally and asymptotically normal models PhD Thesis 2020, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/312253, title = {Preliminary test estimation in uniformly locally and asymptotically normal models}, author = {Rondrotiana J Rasoafaraniaina}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/312253}, year = {2020}, date = {2020-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Pelicaen, Rudy Genome-scale metabolic modeling of candidate functional starter cultures for cocoa bean fermentation PhD Thesis 2020, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/308886, title = {Genome-scale metabolic modeling of candidate functional starter cultures for cocoa bean fermentation}, author = {Rudy Pelicaen}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/308886}, year = {2020}, date = {2020-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Cutting, Christine Testing uniformity against rotationally symmetric alternatives on high-dimensional spheres PhD Thesis 2020, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/306900, title = {Testing uniformity against rotationally symmetric alternatives on high-dimensional spheres}, author = {Christine Cutting}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/306900}, year = {2020}, date = {2020-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
2019 |
isabelle Davila, Caroll 2019, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/285389, title = {Weneya´a – "quien habla con los cerros”. Memoria, mántica y paisaje sagrado en la Sierra Norte de Oaxaca}, author = {Caroll isabelle Davila}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/285389}, year = {2019}, date = {2019-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Duerinckx, Sarah Monogenic and Digenic Inheritance of Primary Microcephaly PhD Thesis 2019, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/297626, title = {Monogenic and Digenic Inheritance of Primary Microcephaly}, author = {Sarah Duerinckx}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/297626}, year = {2019}, date = {2019-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
2018 |
Dierckxsens, Nicolas Targeted organelle genome assembly and heteroplamsy detection PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/277507b, title = {Targeted organelle genome assembly and heteroplamsy detection}, author = {Nicolas Dierckxsens}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/277507}, year = {2018}, date = {2018-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Porretta'S, Luciano MODELS AND METHODS IN GENOME WIDE ASSOCIATION STUDIES PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/265314b, title = {MODELS AND METHODS IN GENOME WIDE ASSOCIATION STUDIES}, author = {Luciano Porretta'S}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/265314}, year = {2018}, date = {2018-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Chen, Jixin Some Domain Decomposition and Convex Optimization Algorithms with Applications to Inverse Problems PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/271782, title = {Some Domain Decomposition and Convex Optimization Algorithms with Applications to Inverse Problems}, author = {Jixin Chen}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/271782}, year = {2018}, date = {2018-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Carcillo, Fabrizio 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/272119b, title = {Beyond Supervised Learning in Credit Card Fraud Detection: A Dive into Semi-supervised and Distributed Learning}, author = {Fabrizio Carcillo}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/272119/5/ContratDiCarcillo.pdf}, year = {2018}, date = {2018-01-01}, abstract = {The expansion of the electronic commerce, as well as the increasing confidence of customers in electronic payments, makes of fraud detection a critical issue. The design of a prompt and accurate Fraud Detection System is a priority for many organizations in the business of credit cards. In this thesis we present a series of studies to increase the precision and the speed of fraud detection system. The thesis has three main contributions. The first concerns the integration of unsupervised techniques and supervised classifiers. We proposed several approaches to integrate outlier scores in the detection process and we found that the accuracy of a conventional classifier may be improved when information about the input distribution is used to augment the training set.The second contribution concerns the role of active learning in Fraud Detection. We have extensively compared several state-of-the-art techniques and found that Stochastic Semi-supervised Learning is a convenient approach to tackle the Selection Bias problem in the active learning process.The third contribution of the thesis is the design, implementation and assessment of SCARFF, an original framework for near real-time Streaming Fraud Detection. This framework integrates Big Data technology (notably tools like Kafka, Spark and Cassandra) with a machine learning approach to deal with imbalance, non-stationarity and feedback latency in a scalable manner. Experimental results on a massive dataset of real credit card transactions have showed that our framework is scalable, efficient and accurate over a big stream of transactions.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } The expansion of the electronic commerce, as well as the increasing confidence of customers in electronic payments, makes of fraud detection a critical issue. The design of a prompt and accurate Fraud Detection System is a priority for many organizations in the business of credit cards. In this thesis we present a series of studies to increase the precision and the speed of fraud detection system. The thesis has three main contributions. The first concerns the integration of unsupervised techniques and supervised classifiers. We proposed several approaches to integrate outlier scores in the detection process and we found that the accuracy of a conventional classifier may be improved when information about the input distribution is used to augment the training set.The second contribution concerns the role of active learning in Fraud Detection. We have extensively compared several state-of-the-art techniques and found that Stochastic Semi-supervised Learning is a convenient approach to tackle the Selection Bias problem in the active learning process.The third contribution of the thesis is the design, implementation and assessment of SCARFF, an original framework for near real-time Streaming Fraud Detection. This framework integrates Big Data technology (notably tools like Kafka, Spark and Cassandra) with a machine learning approach to deal with imbalance, non-stationarity and feedback latency in a scalable manner. Experimental results on a massive dataset of real credit card transactions have showed that our framework is scalable, efficient and accurate over a big stream of transactions. |
Reggiani, Claudio 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/270994c, title = {Bioinformatic discovery of novel exons expressed in human brain and their association with neurodevelopmental disorders}, author = {Claudio Reggiani}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/270994}, year = {2018}, date = {2018-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Bizet, Martin Bioinformatic inference of a prognostic epigenetic signature of immunity in breast cancers PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/265092b, title = {Bioinformatic inference of a prognostic epigenetic signature of immunity in breast cancers}, author = {Martin Bizet}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/265092/7/ContratDiBizet.pdf}, year = {2018}, date = {2018-01-01}, abstract = {L’altération des marques épigénétiques est de plus en plus reconnue comme une caractéristique fondamentale des cancers. Dans cette th`ese, nous avons utilisé des profils de méthylation de l’ADN en vue d’améliorer la classification des patients atteints du cancer du sein gr^ace `a une approche basée sur l’apprentissage automatique. L’objectif `a long terme est le développement d’outils cliniques de médecine personnalisée. Les données de méthylation de l’ADN furent acquises `a l’aide d’une puce `a ADN dédiée `a la méthylation, appelée Infinium. Cette technologie est récente comparée, par exemple, aux puces d’expression génique et son prétraitement n’est pas encore standardisé. La premi`ere partie de cette th`ese fut donc consacrée `a l’évaluation des méthodes de normalisation par comparaison des données normalisées avec d’autres technologies (pyroséquenccage et RRBS) pour les deux technologies Infinium les plus récentes (450k et 850k). Nous avons également évalué la couverture de régions biologiquement relevantes (promoteurs et amplificateurs) par les deux technologies. Ensuite, nous avons utilisé les données Infinium (correctement prétraitées) pour développer un score, appelé MeTIL score, qui présente une valeur pronostique et prédictive dans les cancers du sein. Nous avons profité de la capacité de la méthylation de l’ADN `a refléter la composition cellulaire pour extraire une signature de méthylation (c’est-`a-dire un ensemble de positions de l’ADN o`u la méthylation varie) qui refl`ete la présence de lymphocytes dans l’échantillon tumoral. Apr`es une sélection de sites présentant une méthylation spécifique aux lymphocytes, nous avons développé une approche basée sur l’apprentissage automatique pour obtenir une signature d’une tailleoptimale réduite `a cinq sites permettant potentiellement une utilisation en clinique. Apr`es conversion de cette signature en un score, nous avons montré sa spécificité pour les lymphocytes `a l’aide de données externes et de simulations informatiques. Puis, nous avons montré la capacité du MeTIL score `a prédire la réponse `a la chimiothérapie ainsi que son pouvoir pronostique dans des cohortes indépendantes de cancer du sein et, m^eme, dans d’autres cancers.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } L’altération des marques épigénétiques est de plus en plus reconnue comme une caractéristique fondamentale des cancers. Dans cette th`ese, nous avons utilisé des profils de méthylation de l’ADN en vue d’améliorer la classification des patients atteints du cancer du sein gr^ace `a une approche basée sur l’apprentissage automatique. L’objectif `a long terme est le développement d’outils cliniques de médecine personnalisée. Les données de méthylation de l’ADN furent acquises `a l’aide d’une puce `a ADN dédiée `a la méthylation, appelée Infinium. Cette technologie est récente comparée, par exemple, aux puces d’expression génique et son prétraitement n’est pas encore standardisé. La premi`ere partie de cette th`ese fut donc consacrée `a l’évaluation des méthodes de normalisation par comparaison des données normalisées avec d’autres technologies (pyroséquenccage et RRBS) pour les deux technologies Infinium les plus récentes (450k et 850k). Nous avons également évalué la couverture de régions biologiquement relevantes (promoteurs et amplificateurs) par les deux technologies. Ensuite, nous avons utilisé les données Infinium (correctement prétraitées) pour développer un score, appelé MeTIL score, qui présente une valeur pronostique et prédictive dans les cancers du sein. Nous avons profité de la capacité de la méthylation de l’ADN `a refléter la composition cellulaire pour extraire une signature de méthylation (c’est-`a-dire un ensemble de positions de l’ADN o`u la méthylation varie) qui refl`ete la présence de lymphocytes dans l’échantillon tumoral. Apr`es une sélection de sites présentant une méthylation spécifique aux lymphocytes, nous avons développé une approche basée sur l’apprentissage automatique pour obtenir une signature d’une tailleoptimale réduite `a cinq sites permettant potentiellement une utilisation en clinique. Apr`es conversion de cette signature en un score, nous avons montré sa spécificité pour les lymphocytes `a l’aide de données externes et de simulations informatiques. Puis, nous avons montré la capacité du MeTIL score `a prédire la réponse `a la chimiothérapie ainsi que son pouvoir pronostique dans des cohortes indépendantes de cancer du sein et, m^eme, dans d’autres cancers. |
Gazzo, Andrea Beyond monogenic diseases: a first collection and analysis of digenic diseases PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/272617b, title = {Beyond monogenic diseases: a first collection and analysis of digenic diseases}, author = {Andrea Gazzo}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/272617/5/ContratDiGazzo.pdf}, year = {2018}, date = {2018-01-01}, abstract = {In the next generation sequencing era many bioinformatics tools have been developed for assisting scientists in their studies on the molecular basis of genetic diseases, often with the aim of identifying the pathogenic variants. As a consequence, in the last decades more than one hundred new disease-gene associations have been discovered. Nevertheless, the genetic basis of many genetic diseases yet remains undisclosed. It has been shown that many diseases considered as monogenic with an imperfect genotype-phenotype correlation or incomplete penetrance are, on the contrary, caused or modulated by more than one mutated gene, meaning that they are in fact oligogenic. Current bioinformatics methods used for identifying pathogenic variants are trained and fine-tuned for identifying a single variant responsible of a disease. This monogenic-oriented approach cannot be used to explore the impact of combinations of variants in different genes on the complexity and genetic heterogeneity of rare diseases. Digenic diseases are the simplest form of oligogenic disease and thus they can provide a conceptual bridge between monogenic and the poorly understood polygenic diseases.The ambition of this thesis is to collect and analyse digenic data, introducing this topic in the bioinformatics field where digenic diseases are still an unexplored branch. This can be divided in two steps: the first consists in the creation of a central repository containing detailed information on digenic diseases; the second is an analysis of their peculiarities, using machine learning methods for studying subclasses of digenic effects.In the first step we developed DIDA (DIgenic diseases DAtabase), a novel database that provides for the first time a curated collection of genes and associated variants involved in digenic diseases. Detailed information related to the digenic mechanism have been manually mined from the medical literature. All instances in DIDA were also assigned to two sub classes of digenic effects, annotated as true digenic (both genes are required for developing the disease) and composite classes (one gene is sufficient to produce the disease phenotype, the second one alters it or change significantly the age of onset).In the second step, we hypothesized that the digenic effect may be related to some biological properties characterizing digenic combinations. Using machine learning methods, we show that a set of variant, gene and higher-level features can differentiate between the true digenic and composite classes with high accuracy. Moreover, we show that a digenic effect decision profile, extracted from the predictive model, motivates why an instance is assigned to either of the two classes.Together, our results show that digenic disease data generates novel insights, providing a glimpse into the oligogenic realm.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } In the next generation sequencing era many bioinformatics tools have been developed for assisting scientists in their studies on the molecular basis of genetic diseases, often with the aim of identifying the pathogenic variants. As a consequence, in the last decades more than one hundred new disease-gene associations have been discovered. Nevertheless, the genetic basis of many genetic diseases yet remains undisclosed. It has been shown that many diseases considered as monogenic with an imperfect genotype-phenotype correlation or incomplete penetrance are, on the contrary, caused or modulated by more than one mutated gene, meaning that they are in fact oligogenic. Current bioinformatics methods used for identifying pathogenic variants are trained and fine-tuned for identifying a single variant responsible of a disease. This monogenic-oriented approach cannot be used to explore the impact of combinations of variants in different genes on the complexity and genetic heterogeneity of rare diseases. Digenic diseases are the simplest form of oligogenic disease and thus they can provide a conceptual bridge between monogenic and the poorly understood polygenic diseases.The ambition of this thesis is to collect and analyse digenic data, introducing this topic in the bioinformatics field where digenic diseases are still an unexplored branch. This can be divided in two steps: the first consists in the creation of a central repository containing detailed information on digenic diseases; the second is an analysis of their peculiarities, using machine learning methods for studying subclasses of digenic effects.In the first step we developed DIDA (DIgenic diseases DAtabase), a novel database that provides for the first time a curated collection of genes and associated variants involved in digenic diseases. Detailed information related to the digenic mechanism have been manually mined from the medical literature. All instances in DIDA were also assigned to two sub classes of digenic effects, annotated as true digenic (both genes are required for developing the disease) and composite classes (one gene is sufficient to produce the disease phenotype, the second one alters it or change significantly the age of onset).In the second step, we hypothesized that the digenic effect may be related to some biological properties characterizing digenic combinations. Using machine learning methods, we show that a set of variant, gene and higher-level features can differentiate between the true digenic and composite classes with high accuracy. Moreover, we show that a digenic effect decision profile, extracted from the predictive model, motivates why an instance is assigned to either of the two classes.Together, our results show that digenic disease data generates novel insights, providing a glimpse into the oligogenic realm. |
Reggiani, Claudio 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/270994b, title = {Bioinformatic discovery of novel exons expressed in human brain and their association with neurodevelopmental disorders}, author = {Claudio Reggiani}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/270994/5/ContratDiReggiani.pdf}, year = {2018}, date = {2018-01-01}, abstract = {An important quest in genomics since the publication of the first complete human genome in 2003 has been its functional annotation. DNA holds the instructions to the production of the components necessary for the life of cells and organisms. A complete functional catalog of genomic regions will help the understanding of the cell body and its dynamics, thus creating links between genotype and phenotypic traits. The need for annotations prompted the development of several bioinformatic methods. In the context of promoter and first exon predictors, the majority of models relies principally on structural and chemical properties of the DNA sequence. Some of them integrate information from epigenomic and transcriptomic data as secondary features. Current genomic research asserts that reference genome annotations are far from being fully annotated (human organism included).Physicians rely on reference genome annotations and functional databases to understand disorders with genetic basis, and missing annotations may lead to unresolved cases. Because of their complexity, neurodevelopmental disorders are under study to figure out all genetic regions that are involved. Besides functional validation on model organisms, the search for genotype-phenotype association is supported by statistical analysis, which is typically biased towards known functional regions.This thesis addresses the use of an in-silico integrative analysis to improve reference genome annotations and discover novel functional regions associated with neurodevelopemental disorders. The contributions outlined in this document have practical applications in clinical settings. The presented bioinformatic method is based on epigenomic and transcriptomic data, thus excluding features from DNA sequence. Such integrative approach applied on brain data allowed the discovery of two novel promoters and coding first exons in the human DLG2 gene, which were also found to be statistically associated with neurodevelopmental disorders and intellectual disability in particular. The application of the same methodology to the whole genome resulted in the discovery of other novel exons expressed in brain. Concerning the in-silico method itself, the research demanded a high number of functional and clinical datasets to properly support and validate our discoveries.This work describes a bioinformatic method for genome annotation, in the specific area of promoter and first exons. So far the method has been applied on brain data, and the extension to the whole body data would be a logical by-product. We will leverage distributed frameworks to tackle the even higher amount of data to analyse, a task that has already begun. Another interesting research direction that came up from this work is the temporal enrichment analysis of epigenomics data across different developmental stages, in which changes of epigenomic enrichment suggest time-specific and tissue-specific functional gene and gene isoforms regulation.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } An important quest in genomics since the publication of the first complete human genome in 2003 has been its functional annotation. DNA holds the instructions to the production of the components necessary for the life of cells and organisms. A complete functional catalog of genomic regions will help the understanding of the cell body and its dynamics, thus creating links between genotype and phenotypic traits. The need for annotations prompted the development of several bioinformatic methods. In the context of promoter and first exon predictors, the majority of models relies principally on structural and chemical properties of the DNA sequence. Some of them integrate information from epigenomic and transcriptomic data as secondary features. Current genomic research asserts that reference genome annotations are far from being fully annotated (human organism included).Physicians rely on reference genome annotations and functional databases to understand disorders with genetic basis, and missing annotations may lead to unresolved cases. Because of their complexity, neurodevelopmental disorders are under study to figure out all genetic regions that are involved. Besides functional validation on model organisms, the search for genotype-phenotype association is supported by statistical analysis, which is typically biased towards known functional regions.This thesis addresses the use of an in-silico integrative analysis to improve reference genome annotations and discover novel functional regions associated with neurodevelopemental disorders. The contributions outlined in this document have practical applications in clinical settings. The presented bioinformatic method is based on epigenomic and transcriptomic data, thus excluding features from DNA sequence. Such integrative approach applied on brain data allowed the discovery of two novel promoters and coding first exons in the human DLG2 gene, which were also found to be statistically associated with neurodevelopmental disorders and intellectual disability in particular. The application of the same methodology to the whole genome resulted in the discovery of other novel exons expressed in brain. Concerning the in-silico method itself, the research demanded a high number of functional and clinical datasets to properly support and validate our discoveries.This work describes a bioinformatic method for genome annotation, in the specific area of promoter and first exons. So far the method has been applied on brain data, and the extension to the whole body data would be a logical by-product. We will leverage distributed frameworks to tackle the even higher amount of data to analyse, a task that has already begun. Another interesting research direction that came up from this work is the temporal enrichment analysis of epigenomics data across different developmental stages, in which changes of epigenomic enrichment suggest time-specific and tissue-specific functional gene and gene isoforms regulation. |
Gazzo, Andrea Beyond monogenic diseases: a first collection and analysis of digenic diseases PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/272617c, title = {Beyond monogenic diseases: a first collection and analysis of digenic diseases}, author = {Andrea Gazzo}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/272617}, year = {2018}, date = {2018-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Carcillo, Fabrizio 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/272119c, title = {Beyond Supervised Learning in Credit Card Fraud Detection: A Dive into Semi-supervised and Distributed Learning}, author = {Fabrizio Carcillo}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/272119}, year = {2018}, date = {2018-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Bizet, Martin Bioinformatic inference of a prognostic epigenetic signature of immunity in breast cancers PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/265092c, title = {Bioinformatic inference of a prognostic epigenetic signature of immunity in breast cancers}, author = {Martin Bizet}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/265092}, year = {2018}, date = {2018-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Fimereli, Danai Computational analyses of gene fusions, viruses and parasitic genomic elements in breast cancer PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/263609b, title = {Computational analyses of gene fusions, viruses and parasitic genomic elements in breast cancer}, author = {Danai Fimereli}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/263609}, year = {2018}, date = {2018-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Gazzo, Andrea Beyond monogenic diseases: a first collection and analysis of digenic diseases PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/272617, title = {Beyond monogenic diseases: a first collection and analysis of digenic diseases}, author = {Andrea Gazzo}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/272617/5/ContratDiGazzo.pdf}, year = {2018}, date = {2018-01-01}, abstract = {In the next generation sequencing era many bioinformatics tools have been developed for assisting scientists in their studies on the molecular basis of genetic diseases, often with the aim of identifying the pathogenic variants. As a consequence, in the last decades more than one hundred new disease-gene associations have been discovered. Nevertheless, the genetic basis of many genetic diseases yet remains undisclosed. It has been shown that many diseases considered as monogenic with an imperfect genotype-phenotype correlation or incomplete penetrance are, on the contrary, caused or modulated by more than one mutated gene, meaning that they are in fact oligogenic. Current bioinformatics methods used for identifying pathogenic variants are trained and fine-tuned for identifying a single variant responsible of a disease. This monogenic-oriented approach cannot be used to explore the impact of combinations of variants in different genes on the complexity and genetic heterogeneity of rare diseases. Digenic diseases are the simplest form of oligogenic disease and thus they can provide a conceptual bridge between monogenic and the poorly understood polygenic diseases.The ambition of this thesis is to collect and analyse digenic data, introducing this topic in the bioinformatics field where digenic diseases are still an unexplored branch. This can be divided in two steps: the first consists in the creation of a central repository containing detailed information on digenic diseases; the second is an analysis of their peculiarities, using machine learning methods for studying subclasses of digenic effects.In the first step we developed DIDA (DIgenic diseases DAtabase), a novel database that provides for the first time a curated collection of genes and associated variants involved in digenic diseases. Detailed information related to the digenic mechanism have been manually mined from the medical literature. All instances in DIDA were also assigned to two sub classes of digenic effects, annotated as true digenic (both genes are required for developing the disease) and composite classes (one gene is sufficient to produce the disease phenotype, the second one alters it or change significantly the age of onset).In the second step, we hypothesized that the digenic effect may be related to some biological properties characterizing digenic combinations. Using machine learning methods, we show that a set of variant, gene and higher-level features can differentiate between the true digenic and composite classes with high accuracy. Moreover, we show that a digenic effect decision profile, extracted from the predictive model, motivates why an instance is assigned to either of the two classes.Together, our results show that digenic disease data generates novel insights, providing a glimpse into the oligogenic realm.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } In the next generation sequencing era many bioinformatics tools have been developed for assisting scientists in their studies on the molecular basis of genetic diseases, often with the aim of identifying the pathogenic variants. As a consequence, in the last decades more than one hundred new disease-gene associations have been discovered. Nevertheless, the genetic basis of many genetic diseases yet remains undisclosed. It has been shown that many diseases considered as monogenic with an imperfect genotype-phenotype correlation or incomplete penetrance are, on the contrary, caused or modulated by more than one mutated gene, meaning that they are in fact oligogenic. Current bioinformatics methods used for identifying pathogenic variants are trained and fine-tuned for identifying a single variant responsible of a disease. This monogenic-oriented approach cannot be used to explore the impact of combinations of variants in different genes on the complexity and genetic heterogeneity of rare diseases. Digenic diseases are the simplest form of oligogenic disease and thus they can provide a conceptual bridge between monogenic and the poorly understood polygenic diseases.The ambition of this thesis is to collect and analyse digenic data, introducing this topic in the bioinformatics field where digenic diseases are still an unexplored branch. This can be divided in two steps: the first consists in the creation of a central repository containing detailed information on digenic diseases; the second is an analysis of their peculiarities, using machine learning methods for studying subclasses of digenic effects.In the first step we developed DIDA (DIgenic diseases DAtabase), a novel database that provides for the first time a curated collection of genes and associated variants involved in digenic diseases. Detailed information related to the digenic mechanism have been manually mined from the medical literature. All instances in DIDA were also assigned to two sub classes of digenic effects, annotated as true digenic (both genes are required for developing the disease) and composite classes (one gene is sufficient to produce the disease phenotype, the second one alters it or change significantly the age of onset).In the second step, we hypothesized that the digenic effect may be related to some biological properties characterizing digenic combinations. Using machine learning methods, we show that a set of variant, gene and higher-level features can differentiate between the true digenic and composite classes with high accuracy. Moreover, we show that a digenic effect decision profile, extracted from the predictive model, motivates why an instance is assigned to either of the two classes.Together, our results show that digenic disease data generates novel insights, providing a glimpse into the oligogenic realm. |
Reggiani, Claudio 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/270994, title = {Bioinformatic discovery of novel exons expressed in human brain and their association with neurodevelopmental disorders}, author = {Claudio Reggiani}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/270994/5/ContratDiReggiani.pdf}, year = {2018}, date = {2018-01-01}, abstract = {An important quest in genomics since the publication of the first complete human genome in 2003 has been its functional annotation. DNA holds the instructions to the production of the components necessary for the life of cells and organisms. A complete functional catalog of genomic regions will help the understanding of the cell body and its dynamics, thus creating links between genotype and phenotypic traits. The need for annotations prompted the development of several bioinformatic methods. In the context of promoter and first exon predictors, the majority of models relies principally on structural and chemical properties of the DNA sequence. Some of them integrate information from epigenomic and transcriptomic data as secondary features. Current genomic research asserts that reference genome annotations are far from being fully annotated (human organism included).Physicians rely on reference genome annotations and functional databases to understand disorders with genetic basis, and missing annotations may lead to unresolved cases. Because of their complexity, neurodevelopmental disorders are under study to figure out all genetic regions that are involved. Besides functional validation on model organisms, the search for genotype-phenotype association is supported by statistical analysis, which is typically biased towards known functional regions.This thesis addresses the use of an in-silico integrative analysis to improve reference genome annotations and discover novel functional regions associated with neurodevelopemental disorders. The contributions outlined in this document have practical applications in clinical settings. The presented bioinformatic method is based on epigenomic and transcriptomic data, thus excluding features from DNA sequence. Such integrative approach applied on brain data allowed the discovery of two novel promoters and coding first exons in the human DLG2 gene, which were also found to be statistically associated with neurodevelopmental disorders and intellectual disability in particular. The application of the same methodology to the whole genome resulted in the discovery of other novel exons expressed in brain. Concerning the in-silico method itself, the research demanded a high number of functional and clinical datasets to properly support and validate our discoveries.This work describes a bioinformatic method for genome annotation, in the specific area of promoter and first exons. So far the method has been applied on brain data, and the extension to the whole body data would be a logical by-product. We will leverage distributed frameworks to tackle the even higher amount of data to analyse, a task that has already begun. Another interesting research direction that came up from this work is the temporal enrichment analysis of epigenomics data across different developmental stages, in which changes of epigenomic enrichment suggest time-specific and tissue-specific functional gene and gene isoforms regulation.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } An important quest in genomics since the publication of the first complete human genome in 2003 has been its functional annotation. DNA holds the instructions to the production of the components necessary for the life of cells and organisms. A complete functional catalog of genomic regions will help the understanding of the cell body and its dynamics, thus creating links between genotype and phenotypic traits. The need for annotations prompted the development of several bioinformatic methods. In the context of promoter and first exon predictors, the majority of models relies principally on structural and chemical properties of the DNA sequence. Some of them integrate information from epigenomic and transcriptomic data as secondary features. Current genomic research asserts that reference genome annotations are far from being fully annotated (human organism included).Physicians rely on reference genome annotations and functional databases to understand disorders with genetic basis, and missing annotations may lead to unresolved cases. Because of their complexity, neurodevelopmental disorders are under study to figure out all genetic regions that are involved. Besides functional validation on model organisms, the search for genotype-phenotype association is supported by statistical analysis, which is typically biased towards known functional regions.This thesis addresses the use of an in-silico integrative analysis to improve reference genome annotations and discover novel functional regions associated with neurodevelopemental disorders. The contributions outlined in this document have practical applications in clinical settings. The presented bioinformatic method is based on epigenomic and transcriptomic data, thus excluding features from DNA sequence. Such integrative approach applied on brain data allowed the discovery of two novel promoters and coding first exons in the human DLG2 gene, which were also found to be statistically associated with neurodevelopmental disorders and intellectual disability in particular. The application of the same methodology to the whole genome resulted in the discovery of other novel exons expressed in brain. Concerning the in-silico method itself, the research demanded a high number of functional and clinical datasets to properly support and validate our discoveries.This work describes a bioinformatic method for genome annotation, in the specific area of promoter and first exons. So far the method has been applied on brain data, and the extension to the whole body data would be a logical by-product. We will leverage distributed frameworks to tackle the even higher amount of data to analyse, a task that has already begun. Another interesting research direction that came up from this work is the temporal enrichment analysis of epigenomics data across different developmental stages, in which changes of epigenomic enrichment suggest time-specific and tissue-specific functional gene and gene isoforms regulation. |
Fimereli, Danai Computational analyses of gene fusions, viruses and parasitic genomic elements in breast cancer PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/263609, title = {Computational analyses of gene fusions, viruses and parasitic genomic elements in breast cancer}, author = {Danai Fimereli}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/263609/5/ContratDanaiFimereli.pdf}, year = {2018}, date = {2018-01-01}, abstract = {Breast cancer is the most common cancer in women and research efforts to unravel the underlying mechanisms that drive carcinogenesis are continuous. The emergence of high-throughput sequencing techniques and their constant advancement, in combination with large scale studies of genomic and transcriptomic data, allowed the identification of important genetic changes that take place in the breast cancer genome, including somatic mutations, copy number aberrations and genomic rearrangements.The overall aim of this thesis is to explore the presence of genetic changes that take place in the breast cancer transcriptome and their possible contribution to carcinogenesis. The aim of the first research study was the identification of expressed gene fusions in breast cancer and the study of their association with other genomic events. For achieving this, transcriptome sequencing and Single Nucleotide Polymorphism arrays data for a cohort of 55 tumors and 10 normal breast tissues were combined. Gene fusions were detected in the majority of the samples, with evident differences between breast cancer subtypes, where HER2+ samples had significantly more fusions than the other subtypes. The genome-wide analysis uncovered localization of fusion genes in specific chromosomes like 17, 8 or 20. Additionally, a positive correlation between the number of gene fusions and the number of amplifications was observed, including the association between fusions on chromosome 17 and the amplifications in HER2+ samples, which can be attributed to the highly rearranged genomes of these subtypes. Finally, the absence of highly recurrent fusions across this cohort adds to the notion that gene fusions in breast cancer are most likely private events, with the majority being “passenger” events. In the second research study, the aim was to identify a connection between viral infections and breast cancer by devising five different computational methods for the analysis of both transcriptome and exome data in a cohort of 58 breast tumors. Despite being able to detect viral sequences in our testing dataset, no significantly high numbers of viral sequences were detected in our samples. Specifically, viral sequences (~2-30 reads) were extracted belonging to viruses EBV, HHV6 and Merkel cell polyomavirus. Such low levels of viral expression direct against a viral etiology for breast cancer but one should not exclude possible cases of integrated but silent viruses.In the third research project, we analyzed in silico the transcriptional profiles of human endogenous retroviruses in breast cancer. Despite being scattered across the genome in large numbers, a number of ERVs are actively transcribed, consisting of a small percentage of the total mapped reads. Alongside protein coding genes and lncRNAs, they show distinct expression profiles across the different breast cancer subtypes with luminal and basal-like samples clear separating from each other. Additionally, distinct profiles between ER+ and ER- samples were observed. Tumor specific ERV loci show an association with the immune status of the tumors, indicating that ERVs are reactivated in tumors and could play a role in the activation of the immune response cascade.The results presented in this thesis exhibit only in a small fragment the diversity and heterogeneity of the breast cancer transcriptome. The strength of the sequencing techniques allows the in depth detection of different genomic events. Gene fusions should be considered as part of the breast cancer transcriptome but their low recurrence across samples indicates for a role as passenger events. Under the light of existing results, viral infections do not play a significant role in breast cancer. On the other hand, human endogenous retroviruses, despite originating from exogenous viruses, seems to exhibit transcriptional profiles similar to those of normal genes, indicating that they are part of the genome’s transcriptional machinery.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Breast cancer is the most common cancer in women and research efforts to unravel the underlying mechanisms that drive carcinogenesis are continuous. The emergence of high-throughput sequencing techniques and their constant advancement, in combination with large scale studies of genomic and transcriptomic data, allowed the identification of important genetic changes that take place in the breast cancer genome, including somatic mutations, copy number aberrations and genomic rearrangements.The overall aim of this thesis is to explore the presence of genetic changes that take place in the breast cancer transcriptome and their possible contribution to carcinogenesis. The aim of the first research study was the identification of expressed gene fusions in breast cancer and the study of their association with other genomic events. For achieving this, transcriptome sequencing and Single Nucleotide Polymorphism arrays data for a cohort of 55 tumors and 10 normal breast tissues were combined. Gene fusions were detected in the majority of the samples, with evident differences between breast cancer subtypes, where HER2+ samples had significantly more fusions than the other subtypes. The genome-wide analysis uncovered localization of fusion genes in specific chromosomes like 17, 8 or 20. Additionally, a positive correlation between the number of gene fusions and the number of amplifications was observed, including the association between fusions on chromosome 17 and the amplifications in HER2+ samples, which can be attributed to the highly rearranged genomes of these subtypes. Finally, the absence of highly recurrent fusions across this cohort adds to the notion that gene fusions in breast cancer are most likely private events, with the majority being “passenger” events. In the second research study, the aim was to identify a connection between viral infections and breast cancer by devising five different computational methods for the analysis of both transcriptome and exome data in a cohort of 58 breast tumors. Despite being able to detect viral sequences in our testing dataset, no significantly high numbers of viral sequences were detected in our samples. Specifically, viral sequences (~2-30 reads) were extracted belonging to viruses EBV, HHV6 and Merkel cell polyomavirus. Such low levels of viral expression direct against a viral etiology for breast cancer but one should not exclude possible cases of integrated but silent viruses.In the third research project, we analyzed in silico the transcriptional profiles of human endogenous retroviruses in breast cancer. Despite being scattered across the genome in large numbers, a number of ERVs are actively transcribed, consisting of a small percentage of the total mapped reads. Alongside protein coding genes and lncRNAs, they show distinct expression profiles across the different breast cancer subtypes with luminal and basal-like samples clear separating from each other. Additionally, distinct profiles between ER+ and ER- samples were observed. Tumor specific ERV loci show an association with the immune status of the tumors, indicating that ERVs are reactivated in tumors and could play a role in the activation of the immune response cascade.The results presented in this thesis exhibit only in a small fragment the diversity and heterogeneity of the breast cancer transcriptome. The strength of the sequencing techniques allows the in depth detection of different genomic events. Gene fusions should be considered as part of the breast cancer transcriptome but their low recurrence across samples indicates for a role as passenger events. Under the light of existing results, viral infections do not play a significant role in breast cancer. On the other hand, human endogenous retroviruses, despite originating from exogenous viruses, seems to exhibit transcriptional profiles similar to those of normal genes, indicating that they are part of the genome’s transcriptional machinery. |
Dierckxsens, Nicolas Targeted organelle genome assembly and heteroplamsy detection PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/277507, title = {Targeted organelle genome assembly and heteroplamsy detection}, author = {Nicolas Dierckxsens}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/277507/5/ContratDiDierckxsens.pdf}, year = {2018}, date = {2018-01-01}, abstract = {Thanks to the development of next-generation sequencing (NGS) technology, whole genome data can be readily obtained from a variety of samples. Since the massive increase in available sequencing data, the development of efficient assembly algorithms has become the new bottleneck. Almost every new released tool is based on the De Brujin graph method, which focuses on assembling complete datasets with mathematical models. Although the decreasing sequencing costs made whole genome sequencing (WGS) the most straightforward and least laborious approach of gathering sequencing data, many research projects are only interested in the extranuclear genomes. Unfortunately, few of the available tools are specifically designed to efficiently retrieve these extranuclear genomes from WGS datasets. We developed a seed-and-extend algorithm that assembles organelle circular genomes from WGS data, starting from a single short seed sequence. The algorithm has been tested on several new (Gonioctena intermedia and Avicennia marina) and public (Arabidopsis thaliana and Oryza sativa) whole genome Illumina datasets and always outperformed other assemblers in assembly accuracy and contiguity. In our benchmark, NOVOPlasty assembled all genomes in less than 30 minutes with a maximum RAM memory requirement of 16 GB. NOVOPlasty is the only de novo assembler that provides a fast and straightforward manner to extract the extranuclear sequences from WGS data and generates one circular high quality contig.Heteroplasmy, the existence of multiple mitochondrial haplotypes within an individual, has been researched across different fields. Mitochondrial genome polymorphisms have been linked to multiple severe disorders and are of interest to evolutionary studies and forensic science. By utilizing ultra-deep sequencing, it is now possible to uncover previously undiscovered patterns of intra-individual polymorphism. However, it remains challenging to determine its source. Current available software can detect polymorphic sites but are not capable of determining the link between them. We therefore developed a new method to not only detect intra-individual polymorphisms within mitochondrial and chloroplast genomes, but also to look for linkage among polymorphic sites by assembling the sequence around each detected polymorphic site. Our benchmark study shows that this method can detect heteroplasmy more accurately than any method previously available and is the first tool that is able to completely or partially reconstruct the origin sequences for each intra-individual polymorphism.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Thanks to the development of next-generation sequencing (NGS) technology, whole genome data can be readily obtained from a variety of samples. Since the massive increase in available sequencing data, the development of efficient assembly algorithms has become the new bottleneck. Almost every new released tool is based on the De Brujin graph method, which focuses on assembling complete datasets with mathematical models. Although the decreasing sequencing costs made whole genome sequencing (WGS) the most straightforward and least laborious approach of gathering sequencing data, many research projects are only interested in the extranuclear genomes. Unfortunately, few of the available tools are specifically designed to efficiently retrieve these extranuclear genomes from WGS datasets. We developed a seed-and-extend algorithm that assembles organelle circular genomes from WGS data, starting from a single short seed sequence. The algorithm has been tested on several new (Gonioctena intermedia and Avicennia marina) and public (Arabidopsis thaliana and Oryza sativa) whole genome Illumina datasets and always outperformed other assemblers in assembly accuracy and contiguity. In our benchmark, NOVOPlasty assembled all genomes in less than 30 minutes with a maximum RAM memory requirement of 16 GB. NOVOPlasty is the only de novo assembler that provides a fast and straightforward manner to extract the extranuclear sequences from WGS data and generates one circular high quality contig.Heteroplasmy, the existence of multiple mitochondrial haplotypes within an individual, has been researched across different fields. Mitochondrial genome polymorphisms have been linked to multiple severe disorders and are of interest to evolutionary studies and forensic science. By utilizing ultra-deep sequencing, it is now possible to uncover previously undiscovered patterns of intra-individual polymorphism. However, it remains challenging to determine its source. Current available software can detect polymorphic sites but are not capable of determining the link between them. We therefore developed a new method to not only detect intra-individual polymorphisms within mitochondrial and chloroplast genomes, but also to look for linkage among polymorphic sites by assembling the sequence around each detected polymorphic site. Our benchmark study shows that this method can detect heteroplasmy more accurately than any method previously available and is the first tool that is able to completely or partially reconstruct the origin sequences for each intra-individual polymorphism. |
Carcillo, Fabrizio 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/272119, title = {Beyond Supervised Learning in Credit Card Fraud Detection: A Dive into Semi-supervised and Distributed Learning}, author = {Fabrizio Carcillo}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/272119/5/ContratDiCarcillo.pdf}, year = {2018}, date = {2018-01-01}, abstract = {The expansion of the electronic commerce, as well as the increasing confidence of customers in electronic payments, makes of fraud detection a critical issue. The design of a prompt and accurate Fraud Detection System is a priority for many organizations in the business of credit cards. In this thesis we present a series of studies to increase the precision and the speed of fraud detection system. The thesis has three main contributions. The first concerns the integration of unsupervised techniques and supervised classifiers. We proposed several approaches to integrate outlier scores in the detection process and we found that the accuracy of a conventional classifier may be improved when information about the input distribution is used to augment the training set.The second contribution concerns the role of active learning in Fraud Detection. We have extensively compared several state-of-the-art techniques and found that Stochastic Semi-supervised Learning is a convenient approach to tackle the Selection Bias problem in the active learning process.The third contribution of the thesis is the design, implementation and assessment of SCARFF, an original framework for near real-time Streaming Fraud Detection. This framework integrates Big Data technology (notably tools like Kafka, Spark and Cassandra) with a machine learning approach to deal with imbalance, non-stationarity and feedback latency in a scalable manner. Experimental results on a massive dataset of real credit card transactions have showed that our framework is scalable, efficient and accurate over a big stream of transactions.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } The expansion of the electronic commerce, as well as the increasing confidence of customers in electronic payments, makes of fraud detection a critical issue. The design of a prompt and accurate Fraud Detection System is a priority for many organizations in the business of credit cards. In this thesis we present a series of studies to increase the precision and the speed of fraud detection system. The thesis has three main contributions. The first concerns the integration of unsupervised techniques and supervised classifiers. We proposed several approaches to integrate outlier scores in the detection process and we found that the accuracy of a conventional classifier may be improved when information about the input distribution is used to augment the training set.The second contribution concerns the role of active learning in Fraud Detection. We have extensively compared several state-of-the-art techniques and found that Stochastic Semi-supervised Learning is a convenient approach to tackle the Selection Bias problem in the active learning process.The third contribution of the thesis is the design, implementation and assessment of SCARFF, an original framework for near real-time Streaming Fraud Detection. This framework integrates Big Data technology (notably tools like Kafka, Spark and Cassandra) with a machine learning approach to deal with imbalance, non-stationarity and feedback latency in a scalable manner. Experimental results on a massive dataset of real credit card transactions have showed that our framework is scalable, efficient and accurate over a big stream of transactions. |
Porretta'S, Luciano MODELS AND METHODS IN GENOME WIDE ASSOCIATION STUDIES PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/265314, title = {MODELS AND METHODS IN GENOME WIDE ASSOCIATION STUDIES}, author = {Luciano Porretta'S}, year = {2018}, date = {2018-01-01}, abstract = {The interdisciplinary field of systems biology has evolved rapidly over the last few years. Different disciplines have contributed to the development of both its experimental and theoretical branches.Although computational biology has been an increasing activity in computer science for more than a two decades, it has been only in the past few years that optimization models have been increasingly developed and analyzed by researchers whose primary background is Operations Research(OR). This dissertation aims at contributing to the field of computational biology by applying mathematical programming to certain problems in molecular biology.Specifically, we address three problems in the domain of Genome Wide Association Studies:(i) the Pure Parsimony Haplotyping Under uncertatind Data Problem that consists in finding the minimum number of haplotypes necessary to explain a given set of genotypes containing possible reading errors; (ii) the Parsimonious Loss Of Heterozygosity Problem that consists of partitioning suspected polymorphisms from a set of individuals into a minimum number of deletion areas; (iii) and the Multiple Individuals Polymorphic Alu Insertion Recognition Problem that consists of finding the set of locations in the genome where ALU sequences are inserted in some individual(s).All three problems are NP-hard combinatorial optimization problems. Therefore, we analyse their combinatorial structure and we propose an exact approach to solution for each of them. The proposed models are efficient, accurate, compact, polynomial-sized and usable in all those cases for which the parsimony criterion is well suited for estimation.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } The interdisciplinary field of systems biology has evolved rapidly over the last few years. Different disciplines have contributed to the development of both its experimental and theoretical branches.Although computational biology has been an increasing activity in computer science for more than a two decades, it has been only in the past few years that optimization models have been increasingly developed and analyzed by researchers whose primary background is Operations Research(OR). This dissertation aims at contributing to the field of computational biology by applying mathematical programming to certain problems in molecular biology.Specifically, we address three problems in the domain of Genome Wide Association Studies:(i) the Pure Parsimony Haplotyping Under uncertatind Data Problem that consists in finding the minimum number of haplotypes necessary to explain a given set of genotypes containing possible reading errors; (ii) the Parsimonious Loss Of Heterozygosity Problem that consists of partitioning suspected polymorphisms from a set of individuals into a minimum number of deletion areas; (iii) and the Multiple Individuals Polymorphic Alu Insertion Recognition Problem that consists of finding the set of locations in the genome where ALU sequences are inserted in some individual(s).All three problems are NP-hard combinatorial optimization problems. Therefore, we analyse their combinatorial structure and we propose an exact approach to solution for each of them. The proposed models are efficient, accurate, compact, polynomial-sized and usable in all those cases for which the parsimony criterion is well suited for estimation. |
Bizet, Martin Bioinformatic inference of a prognostic epigenetic signature of immunity in breast cancers PhD Thesis 2018, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/265092, title = {Bioinformatic inference of a prognostic epigenetic signature of immunity in breast cancers}, author = {Martin Bizet}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/265092/7/ContratDiBizet.pdf}, year = {2018}, date = {2018-01-01}, abstract = {L’altération des marques épigénétiques est de plus en plus reconnue comme une caractéristique fondamentale des cancers. Dans cette th`ese, nous avons utilisé des profils de méthylation de l’ADN en vue d’améliorer la classification des patients atteints du cancer du sein gr^ace `a une approche basée sur l’apprentissage automatique. L’objectif `a long terme est le développement d’outils cliniques de médecine personnalisée. Les données de méthylation de l’ADN furent acquises `a l’aide d’une puce `a ADN dédiée `a la méthylation, appelée Infinium. Cette technologie est récente comparée, par exemple, aux puces d’expression génique et son prétraitement n’est pas encore standardisé. La premi`ere partie de cette th`ese fut donc consacrée `a l’évaluation des méthodes de normalisation par comparaison des données normalisées avec d’autres technologies (pyroséquenccage et RRBS) pour les deux technologies Infinium les plus récentes (450k et 850k). Nous avons également évalué la couverture de régions biologiquement relevantes (promoteurs et amplificateurs) par les deux technologies. Ensuite, nous avons utilisé les données Infinium (correctement prétraitées) pour développer un score, appelé MeTIL score, qui présente une valeur pronostique et prédictive dans les cancers du sein. Nous avons profité de la capacité de la méthylation de l’ADN `a refléter la composition cellulaire pour extraire une signature de méthylation (c’est-`a-dire un ensemble de positions de l’ADN o`u la méthylation varie) qui refl`ete la présence de lymphocytes dans l’échantillon tumoral. Apr`es une sélection de sites présentant une méthylation spécifique aux lymphocytes, nous avons développé une approche basée sur l’apprentissage automatique pour obtenir une signature d’une tailleoptimale réduite `a cinq sites permettant potentiellement une utilisation en clinique. Apr`es conversion de cette signature en un score, nous avons montré sa spécificité pour les lymphocytes `a l’aide de données externes et de simulations informatiques. Puis, nous avons montré la capacité du MeTIL score `a prédire la réponse `a la chimiothérapie ainsi que son pouvoir pronostique dans des cohortes indépendantes de cancer du sein et, m^eme, dans d’autres cancers.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } L’altération des marques épigénétiques est de plus en plus reconnue comme une caractéristique fondamentale des cancers. Dans cette th`ese, nous avons utilisé des profils de méthylation de l’ADN en vue d’améliorer la classification des patients atteints du cancer du sein gr^ace `a une approche basée sur l’apprentissage automatique. L’objectif `a long terme est le développement d’outils cliniques de médecine personnalisée. Les données de méthylation de l’ADN furent acquises `a l’aide d’une puce `a ADN dédiée `a la méthylation, appelée Infinium. Cette technologie est récente comparée, par exemple, aux puces d’expression génique et son prétraitement n’est pas encore standardisé. La premi`ere partie de cette th`ese fut donc consacrée `a l’évaluation des méthodes de normalisation par comparaison des données normalisées avec d’autres technologies (pyroséquenccage et RRBS) pour les deux technologies Infinium les plus récentes (450k et 850k). Nous avons également évalué la couverture de régions biologiquement relevantes (promoteurs et amplificateurs) par les deux technologies. Ensuite, nous avons utilisé les données Infinium (correctement prétraitées) pour développer un score, appelé MeTIL score, qui présente une valeur pronostique et prédictive dans les cancers du sein. Nous avons profité de la capacité de la méthylation de l’ADN `a refléter la composition cellulaire pour extraire une signature de méthylation (c’est-`a-dire un ensemble de positions de l’ADN o`u la méthylation varie) qui refl`ete la présence de lymphocytes dans l’échantillon tumoral. Apr`es une sélection de sites présentant une méthylation spécifique aux lymphocytes, nous avons développé une approche basée sur l’apprentissage automatique pour obtenir une signature d’une tailleoptimale réduite `a cinq sites permettant potentiellement une utilisation en clinique. Apr`es conversion de cette signature en un score, nous avons montré sa spécificité pour les lymphocytes `a l’aide de données externes et de simulations informatiques. Puis, nous avons montré la capacité du MeTIL score `a prédire la réponse `a la chimiothérapie ainsi que son pouvoir pronostique dans des cohortes indépendantes de cancer du sein et, m^eme, dans d’autres cancers. |
2017 |
Brown, David Norman 2017, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/260251, title = {Application of phylogenetic inference methods to quantify intra-tumour heterogeneity and evolution of breast cancers}, author = {David Norman Brown}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/260251/6/ContratDavidBrown.pdf}, year = {2017}, date = {2017-01-01}, abstract = {Cancer related mortality is almost always due to metastatic dissemination of the primary disease. While research into the biological mechanisms that drive the metastatic cascade continues to unravel its molecular underpinnings, progress in our understanding of biological phenomena such as tumour heterogeneity and its relevance to the origins of distant recurrence or the emergence of resistance to therapy has been limited.In parallel to major breakthroughs in the development of high throughput molecular techniques, researchers have begun to utilise next generation sequencing to explore the relationship between primary and matched metastatic tumours in diverse types of neoplasia. Despite small cohort sizes and often, a limited number of matched metastases for each patient, pioneering studies have uncovered hitherto unknown biological processes such as the occurrence of organ specific metastatic lineages, polyclonal seeding and homing of metastatic cells to the primary tumour bed. While yet other studies continue to highlight the potential of genomic analyses, at the time this thesis was started, an in-depth knowledge of disease progression and metastatic dissemination was currently lacking in breast cancers.Herein, we employed phylogenetic inference methods to investigate intra-tumour heterogeneity and evolution of breast cancers. A combination of whole exome sequencing, custom ultra-deep resequencing and copy number profiling were applied to primary tumours and their associated metastases from ten autopsied breast cancer patients. Two modes of metastatic progression were observed. In the majority of cases, all distant metastases clustered on a branch separate from their primary lesion. Clonal frequency analysis of somatic mutations showed that the metastases had a monoclonal origin and descended from a common metastatic precursor. Alternatively, the primary tumour was clustered alongside metastases with early branches leading to distant organs. This dichotomy coincided with the clinical history of the patients whereby multiple seeding events from the primary tumour alongside cascading metastasis-to-metastasis disseminations occurred in treatment na"ive de novo metastatic patients, whereas descent from a common metastatic precursor was observed in patients who underwent primary surgery followed by systemic treatment. The data also showed that a distant metastasis can be horizontally cross-seeded and finally revealed a correlation between the extent of somatic point mutations private to the distant lesions and patient overall survival. In an unrelated dataset of relapsed breast cancer patients with matched primary and distant lesions profiled using whole genome sequencing, the landscape of somatic alterations confirmed the time dependency of copy number aberrations implying that cancer phylogenies can be dated using a molecular clock.The work presented here harnesses the strength of high throughput genomic techniques and state of the art phylogenetic tools to tell the evolutionary history of breast cancers. Our results show that the linear and parallel models of metastatic dissemination which have been held as near doctrines for many years are overstated point of views of cancer progression. Beyond the biological insights, these results suggest that surgical excision of the primary tumour in de novo metastatic breast cancers might reduce dissemination in selected cases hence providing a potential biological rationale for this practice. Similarly, there is no strong evidence of benefit in overall survival from surgical resection of oligo-metastases in breast cancer. From our analyses, metastatic lesions constitute an additional source of seeding and heterogeneity in advanced breast cancer. The data presented here is too small to derive practice-changing evidence, but supports the concept that resecting isolated metastases may be of clinical benefit in oligo-metastatic breast cancer patients. In both cases, results from larger prospective studies are warranted.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Cancer related mortality is almost always due to metastatic dissemination of the primary disease. While research into the biological mechanisms that drive the metastatic cascade continues to unravel its molecular underpinnings, progress in our understanding of biological phenomena such as tumour heterogeneity and its relevance to the origins of distant recurrence or the emergence of resistance to therapy has been limited.In parallel to major breakthroughs in the development of high throughput molecular techniques, researchers have begun to utilise next generation sequencing to explore the relationship between primary and matched metastatic tumours in diverse types of neoplasia. Despite small cohort sizes and often, a limited number of matched metastases for each patient, pioneering studies have uncovered hitherto unknown biological processes such as the occurrence of organ specific metastatic lineages, polyclonal seeding and homing of metastatic cells to the primary tumour bed. While yet other studies continue to highlight the potential of genomic analyses, at the time this thesis was started, an in-depth knowledge of disease progression and metastatic dissemination was currently lacking in breast cancers.Herein, we employed phylogenetic inference methods to investigate intra-tumour heterogeneity and evolution of breast cancers. A combination of whole exome sequencing, custom ultra-deep resequencing and copy number profiling were applied to primary tumours and their associated metastases from ten autopsied breast cancer patients. Two modes of metastatic progression were observed. In the majority of cases, all distant metastases clustered on a branch separate from their primary lesion. Clonal frequency analysis of somatic mutations showed that the metastases had a monoclonal origin and descended from a common metastatic precursor. Alternatively, the primary tumour was clustered alongside metastases with early branches leading to distant organs. This dichotomy coincided with the clinical history of the patients whereby multiple seeding events from the primary tumour alongside cascading metastasis-to-metastasis disseminations occurred in treatment na"ive de novo metastatic patients, whereas descent from a common metastatic precursor was observed in patients who underwent primary surgery followed by systemic treatment. The data also showed that a distant metastasis can be horizontally cross-seeded and finally revealed a correlation between the extent of somatic point mutations private to the distant lesions and patient overall survival. In an unrelated dataset of relapsed breast cancer patients with matched primary and distant lesions profiled using whole genome sequencing, the landscape of somatic alterations confirmed the time dependency of copy number aberrations implying that cancer phylogenies can be dated using a molecular clock.The work presented here harnesses the strength of high throughput genomic techniques and state of the art phylogenetic tools to tell the evolutionary history of breast cancers. Our results show that the linear and parallel models of metastatic dissemination which have been held as near doctrines for many years are overstated point of views of cancer progression. Beyond the biological insights, these results suggest that surgical excision of the primary tumour in de novo metastatic breast cancers might reduce dissemination in selected cases hence providing a potential biological rationale for this practice. Similarly, there is no strong evidence of benefit in overall survival from surgical resection of oligo-metastases in breast cancer. From our analyses, metastatic lesions constitute an additional source of seeding and heterogeneity in advanced breast cancer. The data presented here is too small to derive practice-changing evidence, but supports the concept that resecting isolated metastases may be of clinical benefit in oligo-metastatic breast cancer patients. In both cases, results from larger prospective studies are warranted. |
Raimondi, Daniele 2017, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/251313b, title = {The effect of genome variation on human proteins: understanding variants and improving their deleteriousness prediction through extensive contextualisation}, author = {Daniele Raimondi}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/251313/5/ContratDiRaimondi.pdf}, year = {2017}, date = {2017-01-01}, abstract = {Rapid technological advances are providing unprecedented insights in the biologicalsciences, with massive amounts of data generated on genomic and protein sequences.These data continue to grow exponentially, and they are extremely valuable for com-putational tools where the effect of genomic variants on human health is predicted.State of the art tools in this field give varying results and only tend to agree in thecase of single variants that are strongly correlated to disease. The aim of this workis to increase the reliability of these methods, as well as our understanding of theunderlying biological mechanisms that lead to disease. We first developed machinelearning (ML) based structural bioinformatics predictors that are able to predictmolecular features of proteins from the sequence alone. We then used these tools forin silico analysis of the molecular effects of known variants on the affected proteins,and integrated these data with other sources heterogenous sources of information,such as the essentiality of a gene, that put the variants into their broader biologicalcontext. With this information we created DEOGEN, a novel predictor in this field,which is able to deal with the two most common forms of genomic variation, namelySingle Nucleotide Variants (SNVs) and short Insertions and DELetions (INDELs).DEOGEN performs at least on par with other state of the art methods in this fieldon different datasets. The method was then extended with additional contextualdata and is now available as DEOGEN2 via a web server, which visualizes the pre-dicted results for all variants in most human proteins through an interactive interfacetargeted to both bioinformaticians and clinicians.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Rapid technological advances are providing unprecedented insights in the biologicalsciences, with massive amounts of data generated on genomic and protein sequences.These data continue to grow exponentially, and they are extremely valuable for com-putational tools where the effect of genomic variants on human health is predicted.State of the art tools in this field give varying results and only tend to agree in thecase of single variants that are strongly correlated to disease. The aim of this workis to increase the reliability of these methods, as well as our understanding of theunderlying biological mechanisms that lead to disease. We first developed machinelearning (ML) based structural bioinformatics predictors that are able to predictmolecular features of proteins from the sequence alone. We then used these tools forin silico analysis of the molecular effects of known variants on the affected proteins,and integrated these data with other sources heterogenous sources of information,such as the essentiality of a gene, that put the variants into their broader biologicalcontext. With this information we created DEOGEN, a novel predictor in this field,which is able to deal with the two most common forms of genomic variation, namelySingle Nucleotide Variants (SNVs) and short Insertions and DELetions (INDELs).DEOGEN performs at least on par with other state of the art methods in this fieldon different datasets. The method was then extended with additional contextualdata and is now available as DEOGEN2 via a web server, which visualizes the pre-dicted results for all variants in most human proteins through an interactive interfacetargeted to both bioinformaticians and clinicians. |
Amghar, Mohamed Multiscale local polynomial transforms in smoothing and density estimation PhD Thesis 2017, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/262040, title = {Multiscale local polynomial transforms in smoothing and density estimation}, author = {Mohamed Amghar}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/262040}, year = {2017}, date = {2017-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Raimondi, Daniele 2017, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/251313c, title = {The effect of genome variation on human proteins: understanding variants and improving their deleteriousness prediction through extensive contextualisation}, author = {Daniele Raimondi}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/251313}, year = {2017}, date = {2017-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Brown, David Norman 2017, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/260251b, title = {Application of phylogenetic inference methods to quantify intra-tumour heterogeneity and evolution of breast cancers}, author = {David Norman Brown}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/260251}, year = {2017}, date = {2017-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Devooght, Robin Similarity measures on graphs and novel methods for collaborative filtering PhD Thesis 2017, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/296124, title = {Similarity measures on graphs and novel methods for collaborative filtering}, author = {Robin Devooght}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/296124}, year = {2017}, date = {2017-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Raimondi, Daniele 2017, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/251313, title = {The effect of genome variation on human proteins: understanding variants and improving their deleteriousness prediction through extensive contextualisation}, author = {Daniele Raimondi}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/251313/5/ContratDiRaimondi.pdf}, year = {2017}, date = {2017-01-01}, abstract = {Rapid technological advances are providing unprecedented insights in the biologicalsciences, with massive amounts of data generated on genomic and protein sequences.These data continue to grow exponentially, and they are extremely valuable for com-putational tools where the effect of genomic variants on human health is predicted.State of the art tools in this field give varying results and only tend to agree in thecase of single variants that are strongly correlated to disease. The aim of this workis to increase the reliability of these methods, as well as our understanding of theunderlying biological mechanisms that lead to disease. We first developed machinelearning (ML) based structural bioinformatics predictors that are able to predictmolecular features of proteins from the sequence alone. We then used these tools forin silico analysis of the molecular effects of known variants on the affected proteins,and integrated these data with other sources heterogenous sources of information,such as the essentiality of a gene, that put the variants into their broader biologicalcontext. With this information we created DEOGEN, a novel predictor in this field,which is able to deal with the two most common forms of genomic variation, namelySingle Nucleotide Variants (SNVs) and short Insertions and DELetions (INDELs).DEOGEN performs at least on par with other state of the art methods in this fieldon different datasets. The method was then extended with additional contextualdata and is now available as DEOGEN2 via a web server, which visualizes the pre-dicted results for all variants in most human proteins through an interactive interfacetargeted to both bioinformaticians and clinicians.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Rapid technological advances are providing unprecedented insights in the biologicalsciences, with massive amounts of data generated on genomic and protein sequences.These data continue to grow exponentially, and they are extremely valuable for com-putational tools where the effect of genomic variants on human health is predicted.State of the art tools in this field give varying results and only tend to agree in thecase of single variants that are strongly correlated to disease. The aim of this workis to increase the reliability of these methods, as well as our understanding of theunderlying biological mechanisms that lead to disease. We first developed machinelearning (ML) based structural bioinformatics predictors that are able to predictmolecular features of proteins from the sequence alone. We then used these tools forin silico analysis of the molecular effects of known variants on the affected proteins,and integrated these data with other sources heterogenous sources of information,such as the essentiality of a gene, that put the variants into their broader biologicalcontext. With this information we created DEOGEN, a novel predictor in this field,which is able to deal with the two most common forms of genomic variation, namelySingle Nucleotide Variants (SNVs) and short Insertions and DELetions (INDELs).DEOGEN performs at least on par with other state of the art methods in this fieldon different datasets. The method was then extended with additional contextualdata and is now available as DEOGEN2 via a web server, which visualizes the pre-dicted results for all variants in most human proteins through an interactive interfacetargeted to both bioinformaticians and clinicians. |
2016 |
Dendievel, Rémi Sequential stopping under different environments of weak information PhD Thesis 2016, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/239624b, title = {Sequential stopping under different environments of weak information}, author = {Rémi Dendievel}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/239624}, year = {2016}, date = {2016-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Tomás, Gil Da Rocha 2016, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/235915, title = {Gene Expression Markers of Proliferation and Differentiation in Cancer & The Extent of Prognostic Signals in the Cancer Transcriptome}, author = {Gil Da Rocha Tomás}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/235915/5/ContratGilDaRochaTomas.pdf}, year = {2016}, date = {2016-01-01}, abstract = {Le cancer est un groupe de maladies génétiques opérationnellement défini par uneprolifération cellulaire incontr^olée, impliquant une défaillance del'homeostasie de l'organisme. La recherche sur le cancer vise `a fournir desoutils diagnostics précis et des traitements ajustés pour chacune de cesmaladies. La technologie microarray permet la quantification de l'expression detous les produits de transcription du génome humain et constitue donc un outilpour mieux comprendre la nature polygénique du cancer. La technologiemicroarray permet `a la fois de découvrir de nouvelles classes de cancers et deprédire l'issue de maladie en fonction de profils d'expression préalables. Enoutre, l'utilisation de signatures d'expression géniques en tant que marqueursreprésentatifs de certains processus physiologiques moléculaires permetl'emploi de données microarray pour tester des hypoth`eses biologiques.Cette dissertation a deux objectifs: (a) établir la mesure dans laquelledes marqueurs d'expression génique de la différenciation et de la proliférationcellulaire peuvent contribuer `a la classification des maladies cancéreuses; et(b) d'évaluer l'étendue des signaux pronostiques dans les transcriptomescancéreux.Nous avons mis au point une méthode objective pour extraire des signatures dedifférentiation organe-spécifiques `a partir de données d'expression génique.Nous avons ensuite démontré qu'une signature génique de différentiationtissu-spécifique est capable de distinguer avec précision entre des sous-typeshistologiques de difficile classification dans un mod`ele thyro"idien. Ceci faitpreuve du potentiel valeur clinique et diagnostique des signatures dedifférentiation dans le domaine oncologique.Nous montrons aussi qu'une fraction non négligeable des transcriptomes cancéreuxest capable de prédire l'issue des respectives maladies, `a la suite d'uneanalyse systématique de 114 cohortes de profiles d'expression cancéreuxenglobant 19 types de cancers différents. Cet observation est probablement liée`a une vaste structure de corrélation parmis les profils d'expression cancéreux,partiellement expliquée par des variables techniques et biologiques. Cetteevidence met en cause l'utilisation généralisée d'associations statistiquesentre des marqueurs d'expression géniques et les issues de chaque maladie parmisplusieurs patients afin d'en déduire l'implication de mécanismes biologiquesparticuliers dans la progression du cancer.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Le cancer est un groupe de maladies génétiques opérationnellement défini par uneprolifération cellulaire incontr^olée, impliquant une défaillance del'homeostasie de l'organisme. La recherche sur le cancer vise `a fournir desoutils diagnostics précis et des traitements ajustés pour chacune de cesmaladies. La technologie microarray permet la quantification de l'expression detous les produits de transcription du génome humain et constitue donc un outilpour mieux comprendre la nature polygénique du cancer. La technologiemicroarray permet `a la fois de découvrir de nouvelles classes de cancers et deprédire l'issue de maladie en fonction de profils d'expression préalables. Enoutre, l'utilisation de signatures d'expression géniques en tant que marqueursreprésentatifs de certains processus physiologiques moléculaires permetl'emploi de données microarray pour tester des hypoth`eses biologiques.Cette dissertation a deux objectifs: (a) établir la mesure dans laquelledes marqueurs d'expression génique de la différenciation et de la proliférationcellulaire peuvent contribuer `a la classification des maladies cancéreuses; et(b) d'évaluer l'étendue des signaux pronostiques dans les transcriptomescancéreux.Nous avons mis au point une méthode objective pour extraire des signatures dedifférentiation organe-spécifiques `a partir de données d'expression génique.Nous avons ensuite démontré qu'une signature génique de différentiationtissu-spécifique est capable de distinguer avec précision entre des sous-typeshistologiques de difficile classification dans un mod`ele thyro"idien. Ceci faitpreuve du potentiel valeur clinique et diagnostique des signatures dedifférentiation dans le domaine oncologique.Nous montrons aussi qu'une fraction non négligeable des transcriptomes cancéreuxest capable de prédire l'issue des respectives maladies, `a la suite d'uneanalyse systématique de 114 cohortes de profiles d'expression cancéreuxenglobant 19 types de cancers différents. Cet observation est probablement liée`a une vaste structure de corrélation parmis les profils d'expression cancéreux,partiellement expliquée par des variables techniques et biologiques. Cetteevidence met en cause l'utilisation généralisée d'associations statistiquesentre des marqueurs d'expression géniques et les issues de chaque maladie parmisplusieurs patients afin d'en déduire l'implication de mécanismes biologiquesparticuliers dans la progression du cancer. |
Dendievel, Rémi Sequential stopping under different environments of weak information PhD Thesis 2016, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/239624, title = {Sequential stopping under different environments of weak information}, author = {Rémi Dendievel}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/239624/5/contratDendievel.pdf}, year = {2016}, date = {2016-01-01}, abstract = {Notre th`ese s’articule autour du th`eme de l’utilisation optimale de l’information contenue dans un mod`ele probabiliste flexible. Dans le premier chapitre, nous couvrons des résultats bien connus des martingales comme le théor`eme de convergence dit L1 des martingales et le théor`eme d’arr^et. Nous discutons de probl`emes ouverts similaires au «last arrival problem» (Bruss et Yor, 2012) qui sont des vrais défis du point de vue théorique et nous ne pouvons que conjecturer la stratégie optimale.Dans les chapitres suivants, nous résolvons des extensions de probl`emes d’arr^et optimal proposés par R. R. Weber (U. Cambridge), basés sur le «théor`eme des odds» (Bruss, 2000). En résumé, il s’agit d’effectuer une seule action (un seul arr^et) lorsque deux suites d’observations indépendantes sont observées simultanément. Nous donnons la solution `a ces probl`emes pour un nombre (fixé) choisi de processus.Le chapitre suivant passe en revue la plupart des développements récents (depuis 2000) réalisés autour du «théor`eme des odds» (Bruss, 2000). Le matériel présenté fut publié (2013), il a donc été mis `a jour dans cette th`ese pour inclure les derniers résultats depuis cette date.Puis nous réservons un chapitre pour une solution explicite pour un cas particulier du Probl`eme d’arr^et optimal de Robbins. Ce chapitre est basé sur un article publié par l’auteur en collaboration avec le professeur Swan (Université de Li`ege). Ce chapitre offre une belle illustration des difficultés rencontrées lorsque trop d’information sur les variables est contenue dans le mod`ele. La solution optimale de ce probl`eme dans le cas général n’est pas connue. Par contre, contre-intuitivement, dans le «last arrival problem» mentionné plus haut, moins d’information permet, comme nous le montrons, de trouver en effet la solution optimale.La th`ese contient un dernier chapitre sur un probl`eme de nature plus combinatoire que nous pouvons lier `a la théorie des graphes dans une certaine mesure. Nous étudions le processus de création d’un graphe aléatoire particulier et les propriétés des cycles créés par celui-ci. Le probl`eme est séquentiel et permet d’envisager des probl`emes d’arr^et intéressants. Cette étude a des conséquences en théorie des graphes, en analyse combinatoire ainsi qu’en science de la chimie combinatoire pour les applications. Un de nos résultats est analogue au résultat de Janson (1987) relatif au premier cycle créé pendant la création de graphes aléatoires.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Notre th`ese s’articule autour du th`eme de l’utilisation optimale de l’information contenue dans un mod`ele probabiliste flexible. Dans le premier chapitre, nous couvrons des résultats bien connus des martingales comme le théor`eme de convergence dit L1 des martingales et le théor`eme d’arr^et. Nous discutons de probl`emes ouverts similaires au «last arrival problem» (Bruss et Yor, 2012) qui sont des vrais défis du point de vue théorique et nous ne pouvons que conjecturer la stratégie optimale.Dans les chapitres suivants, nous résolvons des extensions de probl`emes d’arr^et optimal proposés par R. R. Weber (U. Cambridge), basés sur le «théor`eme des odds» (Bruss, 2000). En résumé, il s’agit d’effectuer une seule action (un seul arr^et) lorsque deux suites d’observations indépendantes sont observées simultanément. Nous donnons la solution `a ces probl`emes pour un nombre (fixé) choisi de processus.Le chapitre suivant passe en revue la plupart des développements récents (depuis 2000) réalisés autour du «théor`eme des odds» (Bruss, 2000). Le matériel présenté fut publié (2013), il a donc été mis `a jour dans cette th`ese pour inclure les derniers résultats depuis cette date.Puis nous réservons un chapitre pour une solution explicite pour un cas particulier du Probl`eme d’arr^et optimal de Robbins. Ce chapitre est basé sur un article publié par l’auteur en collaboration avec le professeur Swan (Université de Li`ege). Ce chapitre offre une belle illustration des difficultés rencontrées lorsque trop d’information sur les variables est contenue dans le mod`ele. La solution optimale de ce probl`eme dans le cas général n’est pas connue. Par contre, contre-intuitivement, dans le «last arrival problem» mentionné plus haut, moins d’information permet, comme nous le montrons, de trouver en effet la solution optimale.La th`ese contient un dernier chapitre sur un probl`eme de nature plus combinatoire que nous pouvons lier `a la théorie des graphes dans une certaine mesure. Nous étudions le processus de création d’un graphe aléatoire particulier et les propriétés des cycles créés par celui-ci. Le probl`eme est séquentiel et permet d’envisager des probl`emes d’arr^et intéressants. Cette étude a des conséquences en théorie des graphes, en analyse combinatoire ainsi qu’en science de la chimie combinatoire pour les applications. Un de nos résultats est analogue au résultat de Janson (1987) relatif au premier cycle créé pendant la création de graphes aléatoires. |
Zisis, Ioannis The Effect of Group Formation on Behaviour: An Experimental and Evolutionary Analysis PhD Thesis 2016, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/231974b, title = {The Effect of Group Formation on Behaviour: An Experimental and Evolutionary Analysis}, author = {Ioannis Zisis}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/231974/5/contratZisis.pdf}, year = {2016}, date = {2016-01-01}, abstract = {The division of resources between a group of people may cause con- flicts: Individuals with varying roles and responsibilities will claim different shares of the surplus to be divided. In this dissertation, we analyze how the decision to form a group will influence the bargaining behaviour of the members of that group. People will act collectively as certain tasks may require the participation of a specific number of individuals before it can be completed. We examine whether certain mechanisms can efficiently promote group formation for the sake of surplus production, and then, what will be the effect of these mechanisms on the behaviour of the group members. For these reasons, we constructed a novel surplus production and distribution interaction which we call the Anticipation Game (AG). The AG can be played between only two players (pairwise interaction) or among more then two players (group interaction). In our study we will analyze both the pairwise AG and the group version of AG, first by obtaining our own empirical data and then by performing a stochastic evolutionary analysis. We aim to provide answers on: i) how will a reputation based partner approval mechanism influence the surplus distribution in both the pairwise and the group AG, ii) will then limitations in obtaining the reputation of a potential partner alter the results of the pairwise AG?, iii) will we notice any effect on the behaviour of players when they can repeatedly cooperate with the same partners in group interactions, iv) how natural selection may have shaped the behaviour of players in group formation interactions (both pairwise and group AG evolutionary analysis).}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } The division of resources between a group of people may cause con- flicts: Individuals with varying roles and responsibilities will claim different shares of the surplus to be divided. In this dissertation, we analyze how the decision to form a group will influence the bargaining behaviour of the members of that group. People will act collectively as certain tasks may require the participation of a specific number of individuals before it can be completed. We examine whether certain mechanisms can efficiently promote group formation for the sake of surplus production, and then, what will be the effect of these mechanisms on the behaviour of the group members. For these reasons, we constructed a novel surplus production and distribution interaction which we call the Anticipation Game (AG). The AG can be played between only two players (pairwise interaction) or among more then two players (group interaction). In our study we will analyze both the pairwise AG and the group version of AG, first by obtaining our own empirical data and then by performing a stochastic evolutionary analysis. We aim to provide answers on: i) how will a reputation based partner approval mechanism influence the surplus distribution in both the pairwise and the group AG, ii) will then limitations in obtaining the reputation of a potential partner alter the results of the pairwise AG?, iii) will we notice any effect on the behaviour of players when they can repeatedly cooperate with the same partners in group interactions, iv) how natural selection may have shaped the behaviour of players in group formation interactions (both pairwise and group AG evolutionary analysis). |
Zisis, Ioannis The Effect of Group Formation on Behaviour: An Experimental and Evolutionary Analysis PhD Thesis 2016, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/231974c, title = {The Effect of Group Formation on Behaviour: An Experimental and Evolutionary Analysis}, author = {Ioannis Zisis}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/231974}, year = {2016}, date = {2016-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Tomás, Gil Da Rocha 2016, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/235915b, title = {Gene Expression Markers of Proliferation and Differentiation in Cancer & The Extent of Prognostic Signals in the Cancer Transcriptome}, author = {Gil Da Rocha Tomás}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/235915}, year = {2016}, date = {2016-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Tarabichi, Maxime Integrative analyses of genome-wide transcriptomic and genomic thyroid cancer profiles PhD Thesis 2016, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/225138b, title = {Integrative analyses of genome-wide transcriptomic and genomic thyroid cancer profiles}, author = {Maxime Tarabichi}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/225138}, year = {2016}, date = {2016-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Zisis, Ioannis The Effect of Group Formation on Behaviour: An Experimental and Evolutionary Analysis PhD Thesis 2016, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/231974, title = {The Effect of Group Formation on Behaviour: An Experimental and Evolutionary Analysis}, author = {Ioannis Zisis}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/231974/5/contratZisis.pdf}, year = {2016}, date = {2016-01-01}, abstract = {The division of resources between a group of people may cause con- flicts: Individuals with varying roles and responsibilities will claim different shares of the surplus to be divided. In this dissertation, we analyze how the decision to form a group will influence the bargaining behaviour of the members of that group. People will act collectively as certain tasks may require the participation of a specific number of individuals before it can be completed. We examine whether certain mechanisms can efficiently promote group formation for the sake of surplus production, and then, what will be the effect of these mechanisms on the behaviour of the group members. For these reasons, we constructed a novel surplus production and distribution interaction which we call the Anticipation Game (AG). The AG can be played between only two players (pairwise interaction) or among more then two players (group interaction). In our study we will analyze both the pairwise AG and the group version of AG, first by obtaining our own empirical data and then by performing a stochastic evolutionary analysis. We aim to provide answers on: i) how will a reputation based partner approval mechanism influence the surplus distribution in both the pairwise and the group AG, ii) will then limitations in obtaining the reputation of a potential partner alter the results of the pairwise AG?, iii) will we notice any effect on the behaviour of players when they can repeatedly cooperate with the same partners in group interactions, iv) how natural selection may have shaped the behaviour of players in group formation interactions (both pairwise and group AG evolutionary analysis).}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } The division of resources between a group of people may cause con- flicts: Individuals with varying roles and responsibilities will claim different shares of the surplus to be divided. In this dissertation, we analyze how the decision to form a group will influence the bargaining behaviour of the members of that group. People will act collectively as certain tasks may require the participation of a specific number of individuals before it can be completed. We examine whether certain mechanisms can efficiently promote group formation for the sake of surplus production, and then, what will be the effect of these mechanisms on the behaviour of the group members. For these reasons, we constructed a novel surplus production and distribution interaction which we call the Anticipation Game (AG). The AG can be played between only two players (pairwise interaction) or among more then two players (group interaction). In our study we will analyze both the pairwise AG and the group version of AG, first by obtaining our own empirical data and then by performing a stochastic evolutionary analysis. We aim to provide answers on: i) how will a reputation based partner approval mechanism influence the surplus distribution in both the pairwise and the group AG, ii) will then limitations in obtaining the reputation of a potential partner alter the results of the pairwise AG?, iii) will we notice any effect on the behaviour of players when they can repeatedly cooperate with the same partners in group interactions, iv) how natural selection may have shaped the behaviour of players in group formation interactions (both pairwise and group AG evolutionary analysis). |
Tarabichi, Maxime Integrative analyses of genome-wide transcriptomic and genomic thyroid cancer profiles PhD Thesis 2016, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/225138, title = {Integrative analyses of genome-wide transcriptomic and genomic thyroid cancer profiles}, author = {Maxime Tarabichi}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/225138/5/ContratMaximeTarabichi.pdf}, year = {2016}, date = {2016-01-01}, abstract = {Cette th`ese en bioinformatique a été réalisée entre 2010 et 2015 dans le groupe du Pr. Vincent Detours `a l’Institut de Recherche Interdisciplinaire en Biologie Humaine et Moléculaire. Nous avons analysé des données génomiques et transcriptomiques provenant de carcinomes papillaires de la thyro"ide (CPTs) et leurs tissus non-cancéreux adjacents. La premi`ere partie étudiait les différences transcriptomiques entre CPTs post-Tchernobyl et CPTs sporadiques, et leur tissus non-cancéreux adjacents. Dans notre cohorte, les cas sporadiques étaient en moyenne et significativement un an plus jeunes. Apr`es un ajustement des données transcriptionnelles pour l'^age, pr`es de 400 g`enes étaient plus exprimés dans les tissus adjacents des patients exposés aux radiations. Cependant, nous n’avons pu détecter aucune surreprésentation de groupe de g`enes participant `a des fonctions biologiques connues. Il était possible de distinguer les cas sporadiques des cas post-Tchernobyl sur base des transcriptomes de leurs tissus adjacents, avec une précision de ~70%. Cette surexpression de g`enes dans les tissus non-cancéreux adjacents pourrait ^etre liée `a une radiosensibilité accrue dans le groupe des patients exposés aux radiations de Tchernobyl. Dans la deuxi`eme étude, nous avons intégré des données provenant des patients de la premi`ere partie, incluant les nombres de copies d'ADN des CPTs, le génotype de plus de 400.000 SNPs dans le sang et les données transcriptionnelles des CPTs et leurs tissus non-cancéreux adjacents. En reproduisant les résultats d'une étude précédente, nous avons retrouvé la région 7q11.23 dupliquée exclusivement dans un tiers des patients exposés aux radiations. Dans une étude indépendante, un autre groupe a montré que la duplication de cette région était plus fréquente dans une population de lignées cellulaires radiosensibles que dans la population humaine normale. Cependant, en analysant les transcriptomes des patients présentant cette duplication, nous n'avons pas détecté de différence d’expression des g`enes codés dans cette région génomique. En outre, aucun génotype de SNP n'était significativement lié `a l'exposition aux radiations. En conclusion, les résultats confirment qu'un tiers des CPTs post-Tchernobyl ont des traces d'un dég^at radio-sensibilsant dans leur ADN. Dans une troisi`eme étude, nous avons étudié les différences transcriptionnelles entre CPTs et leurs métastases ganglionnaires (MGs) associées, ainsi qu'entre des CPTs développant des MGs (N+) et des CPTs ne développant pas de MGs (N0). Des études précédentes comparant les MGs et leurs tumeurs associées impliquant d’autres organes ont montré une surexpression de g`enes dans les MGs, liés aux cellules immunitaires. Ce signal provient du tissu contaminant environnant les MGs. Pour se défaire de ce signal contaminant, d’autres études ont microdisséqué au laser les parties tumorales des MGs. Cependant, la microdissection retire aussi le stroma associé `a la tumeur, alors que celui-ci est justement impliqué dans la progression tumorale. Gr^ace `a une méthode originale, nous avons corrigé nos données d’expression des MGs pour leur contenu en contaminant ganglionnaire non-cancéreux. Apr`es cette correction, l’expression de g`enes liés au stroma était plus élevée dans les MGs que dans leurs CPTs. Les différences d’expression entre N0 et N+ n’étaient pas reproductibles entre 4 jeux de données indépendants de CPTs. Ceci démontre l’absence d’un signal transcriptionnelle lié au statut nodal dans ces données. Cependant, en utilisant des données publiques comprenant des centaines de tumeurs, il est possible de prédire le statut nodal (N0 ou N+) des CPTs ainsi que des cancers du sein et du colon `a partir de leurs transcriptomes. Des études précédentes montraient des taux de prédiction presque parfaits (>90%) du statut nodal `a partir des données transcriptomiques. Nous avons décelés dans ces études le m^eme biais technique de sélection des g`enes, qui peut expliquer ces taux artificiellement élevés. Dans notre étude, ce biais n’était pas présent et la précision de nos prédictions était limitée (<70%), questionnant l’intér^et clinique de telles prédictions. La présence d’un signal permettant de prédire le statut nodal et l’irreproductibilité de ce signal dans des jeux de données indépendants peuvent s'expliquer par l’association entre le statut nodal et des caractéristiques d'agressivité des tumeurs, qui pourraient, elles, avoir une influence reproductible sur les transcriptomes. Dans notre derni`ere étude, nous avons analysé les différences entre CPTs, liées `a la présence de BRAFV600E, une mutation commune `a 60% des CPTs. En utilisant un jeu de données public, nous avons montré que les CPTs présentant la mutation étaient plus dédifférenciés, et plus infiltrés en stroma, probablement en lymphocytes et fibroblastes; et que ces CPTs présentaient plus de fibrose et proliféraient sans doute plus. Tout ceci sugg`ere que les CPTs mutés pour BRAF constituent un groupe de CPTs plus agressif. Des caractéristiques d’agressivité pourraient ^etre détectées au front invasif, c’est-`a-dire la périphérie de la tumeur définissant son contact avec le stroma, notamment la présence de regroupement de cellules isolées du reste de la tumeur. Dans les CPTs, ces ^ilots cellulaires isolés sont observés sur des lames histologiques 2D et pourraient ^etre expliqués soit par un détachement cellulaire, signe d’agressivité lié au processus métastatique, soit une conformation complexe compatible avec une tumeur connexe en 3D. Dans un CPT, nous avons analysé la conformation 3D du front invasif d'un CPT muté. Nous avons reconstruit son volume 3D gr^ace `a une méthode originale. Les groupes de cellules cancéreuses qui semblaient isolées sur les images 2D d’histopathologie, étaient en fait connectés en 3D. L’hypoth`ese de la présence de détachement cellulaire suite `a la transition épithélio-mésenchymateuse n’est donc pas requise pour expliquer la présence de ces ^ilots cellulaires en 2D. La forme 3D du front invasif impliquait une surface de contact entre tumeur et stroma bien plus importante qu'impliquée par la forme ellipso"ide habituellement décrite. Les fibroblastes participaient autant `a la création de la masse tumorale que les cellules cancéreuses, puisque ces deux groupes de cellules proliféraient `a la m^eme vitesse. A l'avenir, le séquenccage du matériel génétique de cellules individuelles facilitera notre interprétation des signaux génomiques et transcriptomiques, qui jusqu’alors provenaient de tissu complet, i.e. un mélange de populations de cellules tumorales, stromales et de contaminant. Une signature de radiation pourrait ^etre extraite des profils mutationnels de cellules individuelles exposées aux radiations et `a l’H2O2 in vitro et comparée `a la signature des CTPs post-Tchernobyl. Les cellules tumorales et stromales individuelles des MGs pourraient ^etre comparées aux cellules tumorales et stromales invividuelles des CPTs. De m^eme les cellules individuelles mutées pour BRAFV600E pourraient ^etre comparées aux cellules non mutées.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Cette th`ese en bioinformatique a été réalisée entre 2010 et 2015 dans le groupe du Pr. Vincent Detours `a l’Institut de Recherche Interdisciplinaire en Biologie Humaine et Moléculaire. Nous avons analysé des données génomiques et transcriptomiques provenant de carcinomes papillaires de la thyro"ide (CPTs) et leurs tissus non-cancéreux adjacents. La premi`ere partie étudiait les différences transcriptomiques entre CPTs post-Tchernobyl et CPTs sporadiques, et leur tissus non-cancéreux adjacents. Dans notre cohorte, les cas sporadiques étaient en moyenne et significativement un an plus jeunes. Apr`es un ajustement des données transcriptionnelles pour l'^age, pr`es de 400 g`enes étaient plus exprimés dans les tissus adjacents des patients exposés aux radiations. Cependant, nous n’avons pu détecter aucune surreprésentation de groupe de g`enes participant `a des fonctions biologiques connues. Il était possible de distinguer les cas sporadiques des cas post-Tchernobyl sur base des transcriptomes de leurs tissus adjacents, avec une précision de ~70%. Cette surexpression de g`enes dans les tissus non-cancéreux adjacents pourrait ^etre liée `a une radiosensibilité accrue dans le groupe des patients exposés aux radiations de Tchernobyl. Dans la deuxi`eme étude, nous avons intégré des données provenant des patients de la premi`ere partie, incluant les nombres de copies d'ADN des CPTs, le génotype de plus de 400.000 SNPs dans le sang et les données transcriptionnelles des CPTs et leurs tissus non-cancéreux adjacents. En reproduisant les résultats d'une étude précédente, nous avons retrouvé la région 7q11.23 dupliquée exclusivement dans un tiers des patients exposés aux radiations. Dans une étude indépendante, un autre groupe a montré que la duplication de cette région était plus fréquente dans une population de lignées cellulaires radiosensibles que dans la population humaine normale. Cependant, en analysant les transcriptomes des patients présentant cette duplication, nous n'avons pas détecté de différence d’expression des g`enes codés dans cette région génomique. En outre, aucun génotype de SNP n'était significativement lié `a l'exposition aux radiations. En conclusion, les résultats confirment qu'un tiers des CPTs post-Tchernobyl ont des traces d'un dég^at radio-sensibilsant dans leur ADN. Dans une troisi`eme étude, nous avons étudié les différences transcriptionnelles entre CPTs et leurs métastases ganglionnaires (MGs) associées, ainsi qu'entre des CPTs développant des MGs (N+) et des CPTs ne développant pas de MGs (N0). Des études précédentes comparant les MGs et leurs tumeurs associées impliquant d’autres organes ont montré une surexpression de g`enes dans les MGs, liés aux cellules immunitaires. Ce signal provient du tissu contaminant environnant les MGs. Pour se défaire de ce signal contaminant, d’autres études ont microdisséqué au laser les parties tumorales des MGs. Cependant, la microdissection retire aussi le stroma associé `a la tumeur, alors que celui-ci est justement impliqué dans la progression tumorale. Gr^ace `a une méthode originale, nous avons corrigé nos données d’expression des MGs pour leur contenu en contaminant ganglionnaire non-cancéreux. Apr`es cette correction, l’expression de g`enes liés au stroma était plus élevée dans les MGs que dans leurs CPTs. Les différences d’expression entre N0 et N+ n’étaient pas reproductibles entre 4 jeux de données indépendants de CPTs. Ceci démontre l’absence d’un signal transcriptionnelle lié au statut nodal dans ces données. Cependant, en utilisant des données publiques comprenant des centaines de tumeurs, il est possible de prédire le statut nodal (N0 ou N+) des CPTs ainsi que des cancers du sein et du colon `a partir de leurs transcriptomes. Des études précédentes montraient des taux de prédiction presque parfaits (>90%) du statut nodal `a partir des données transcriptomiques. Nous avons décelés dans ces études le m^eme biais technique de sélection des g`enes, qui peut expliquer ces taux artificiellement élevés. Dans notre étude, ce biais n’était pas présent et la précision de nos prédictions était limitée (<70%), questionnant l’intér^et clinique de telles prédictions. La présence d’un signal permettant de prédire le statut nodal et l’irreproductibilité de ce signal dans des jeux de données indépendants peuvent s'expliquer par l’association entre le statut nodal et des caractéristiques d'agressivité des tumeurs, qui pourraient, elles, avoir une influence reproductible sur les transcriptomes. Dans notre derni`ere étude, nous avons analysé les différences entre CPTs, liées `a la présence de BRAFV600E, une mutation commune `a 60% des CPTs. En utilisant un jeu de données public, nous avons montré que les CPTs présentant la mutation étaient plus dédifférenciés, et plus infiltrés en stroma, probablement en lymphocytes et fibroblastes; et que ces CPTs présentaient plus de fibrose et proliféraient sans doute plus. Tout ceci sugg`ere que les CPTs mutés pour BRAF constituent un groupe de CPTs plus agressif. Des caractéristiques d’agressivité pourraient ^etre détectées au front invasif, c’est-`a-dire la périphérie de la tumeur définissant son contact avec le stroma, notamment la présence de regroupement de cellules isolées du reste de la tumeur. Dans les CPTs, ces ^ilots cellulaires isolés sont observés sur des lames histologiques 2D et pourraient ^etre expliqués soit par un détachement cellulaire, signe d’agressivité lié au processus métastatique, soit une conformation complexe compatible avec une tumeur connexe en 3D. Dans un CPT, nous avons analysé la conformation 3D du front invasif d'un CPT muté. Nous avons reconstruit son volume 3D gr^ace `a une méthode originale. Les groupes de cellules cancéreuses qui semblaient isolées sur les images 2D d’histopathologie, étaient en fait connectés en 3D. L’hypoth`ese de la présence de détachement cellulaire suite `a la transition épithélio-mésenchymateuse n’est donc pas requise pour expliquer la présence de ces ^ilots cellulaires en 2D. La forme 3D du front invasif impliquait une surface de contact entre tumeur et stroma bien plus importante qu'impliquée par la forme ellipso"ide habituellement décrite. Les fibroblastes participaient autant `a la création de la masse tumorale que les cellules cancéreuses, puisque ces deux groupes de cellules proliféraient `a la m^eme vitesse. A l'avenir, le séquenccage du matériel génétique de cellules individuelles facilitera notre interprétation des signaux génomiques et transcriptomiques, qui jusqu’alors provenaient de tissu complet, i.e. un mélange de populations de cellules tumorales, stromales et de contaminant. Une signature de radiation pourrait ^etre extraite des profils mutationnels de cellules individuelles exposées aux radiations et `a l’H2O2 in vitro et comparée `a la signature des CTPs post-Tchernobyl. Les cellules tumorales et stromales individuelles des MGs pourraient ^etre comparées aux cellules tumorales et stromales invividuelles des CPTs. De m^eme les cellules individuelles mutées pour BRAFV600E pourraient ^etre comparées aux cellules non mutées. |
2015 |
Lopes, Miguel Inference of gene networks from time series expression data and application to type 1 Diabetes PhD Thesis 2015, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/216729, title = {Inference of gene networks from time series expression data and application to type 1 Diabetes}, author = {Miguel Lopes}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/216729/6/contratGasparLopes.pdf}, year = {2015}, date = {2015-01-01}, abstract = {The inference of gene regulatory networks (GRN) is of great importance to medical research, as causal mechanisms responsible for phenotypes are unravelled and potential therapeutical targets identified. In type 1 diabetes, insulin producing pancreatic beta-cells are the target of an auto-immune attack leading to apoptosis (cell suicide). Although key genes and regulations have been identified, a precise characterization of the process leading to beta-cell apoptosis has not been achieved yet. The inference of relevant molecular pathways in type 1 diabetes is then a crucial research topic. GRN inference from gene expression data (obtained from microarrays and RNA-seq technology) is a causal inference problem which may be tackled with well-established statistical and machine learning concepts. In particular, the use of time series facilitates the identification of the causal direction in cause-effect gene pairs. However, inference from gene expression data is a very challenging problem due to the large number of existing genes (in human, over twenty thousand) and the typical low number of samples in gene expression datasets. In this context, it is important to correctly assess the accuracy of network inference methods. The contributions of this thesis are on three distinct aspects. The first is on inference assessment using precision-recall curves, in particular using the area under the curve (AUPRC). The typical approach to assess AUPRC significance is using Monte Carlo, and a parametric alternative is proposed. It consists on deriving the mean and variance of the null AUPRC and then using these parameters to fit a beta distribution approximating the true distribution. The second contribution is an investigation on network inference from time series. Several state of the art strategies are experimentally assessed and novel heuristics are proposed. One is a fast approximation of first order Granger causality scores, suited for GRN inference in the large variable case. Another identifies co-regulated genes (ie. regulated by the same genes). Both are experimentally validated using microarray and simulated time series. The third contribution of this thesis is on the context of type 1 diabetes and is a study on beta cell gene expression after exposure to cytokines, emulating the mechanisms leading to apoptosis. 8 datasets of beta cell gene expression were used to identify differentially expressed genes before and after 24h, which were functionally characterized using bioinformatics tools. The two most differentially expressed genes, previously unknown in the type 1 Diabetes literature (RIPK2 and ELF3) were found to modulate cytokine induced apoptosis. A regulatory network was then inferred using a dynamic adaptation of a state of the art network inference method. Three out of four predicted regulations (involving RIPK2 and ELF3) were experimentally confirmed, providing a proof of concept for the adopted approach.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } The inference of gene regulatory networks (GRN) is of great importance to medical research, as causal mechanisms responsible for phenotypes are unravelled and potential therapeutical targets identified. In type 1 diabetes, insulin producing pancreatic beta-cells are the target of an auto-immune attack leading to apoptosis (cell suicide). Although key genes and regulations have been identified, a precise characterization of the process leading to beta-cell apoptosis has not been achieved yet. The inference of relevant molecular pathways in type 1 diabetes is then a crucial research topic. GRN inference from gene expression data (obtained from microarrays and RNA-seq technology) is a causal inference problem which may be tackled with well-established statistical and machine learning concepts. In particular, the use of time series facilitates the identification of the causal direction in cause-effect gene pairs. However, inference from gene expression data is a very challenging problem due to the large number of existing genes (in human, over twenty thousand) and the typical low number of samples in gene expression datasets. In this context, it is important to correctly assess the accuracy of network inference methods. The contributions of this thesis are on three distinct aspects. The first is on inference assessment using precision-recall curves, in particular using the area under the curve (AUPRC). The typical approach to assess AUPRC significance is using Monte Carlo, and a parametric alternative is proposed. It consists on deriving the mean and variance of the null AUPRC and then using these parameters to fit a beta distribution approximating the true distribution. The second contribution is an investigation on network inference from time series. Several state of the art strategies are experimentally assessed and novel heuristics are proposed. One is a fast approximation of first order Granger causality scores, suited for GRN inference in the large variable case. Another identifies co-regulated genes (ie. regulated by the same genes). Both are experimentally validated using microarray and simulated time series. The third contribution of this thesis is on the context of type 1 diabetes and is a study on beta cell gene expression after exposure to cytokines, emulating the mechanisms leading to apoptosis. 8 datasets of beta cell gene expression were used to identify differentially expressed genes before and after 24h, which were functionally characterized using bioinformatics tools. The two most differentially expressed genes, previously unknown in the type 1 Diabetes literature (RIPK2 and ELF3) were found to modulate cytokine induced apoptosis. A regulatory network was then inferred using a dynamic adaptation of a state of the art network inference method. Three out of four predicted regulations (involving RIPK2 and ELF3) were experimentally confirmed, providing a proof of concept for the adopted approach. |
Hajingabo, Leon 2015, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/209126, title = {Analyzing molecular network perturbations in human cancer: application to mutated genes and gene fusions involved in acute lymphoblastic leukemia}, author = {Leon Hajingabo}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/209126/3/d6d6225e-dfa8-46a3-985a-2ef19c71eff1.txt}, year = {2015}, date = {2015-01-01}, abstract = {Le séquenccage du génome humain et l'émergence de nouvelles technologies de génomique `a haut débit, ont initié de nouveaux mod`eles d'investigation pour l'analyse systématique des maladies humaines. Actuellement, nous pouvons tenter de comprendre les maladies tel que le cancer avec une perspective plus globale, en identifiant des g`enes responsables des cancers et en étudiant la mani`ere dont leurs produits protéiques fonctionnent dans un réseau d’interactions moléculaires. Dans ce contexte, nous avons collecté les g`enes spécifiquement liés `a la Leucémie Lymphoblastique Aigu"e (LLA), et identifié de nouveaux partenaires d'interaction qui relient ces g`enes clés associés `a la LLA tels que NOTCH1, FBW7, KRAS et PTPN11, dans un réseau d’interactions. Nous avons également tenté de prédire l’impact fonctionnel des variations génomiques tel que des fusions de g`enes impliquées dans LLA. En utilisant comme mod`eles trois différentes translocations chromosomiques ETV6-RUNX1 (TEL-AML1), BCR-ABL1, et E2A-PBX1 (TCF3-PBX1) fréquemment identifiées dans des cellules B LLA, nous avons adapté une approche de prédiction d’oncog`enes afin de prédire des perturbations moléculaires dans la LLA. Nous avons montré que les circuits transcriptomiques dépendant de Myc et JunD sont spécifiquement dérégulés suite aux fusions de g`enes TEL-AML1 et TCF3-PBX1, respectivement. Nous avons également identifié le mécanisme de transport des ARNm dépendant du facteur NXF1 comme une cible directe de la protéine de fusion TCF3-PBX1. Gr^ace `a cette approche combinant les données interactomiques et les analyses d'expression génique, nous avons fourni un nouvel aperccu `a la compréhension moléculaire de la Leucémie Lymphoblastique Aigu"e.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Le séquenccage du génome humain et l'émergence de nouvelles technologies de génomique `a haut débit, ont initié de nouveaux mod`eles d'investigation pour l'analyse systématique des maladies humaines. Actuellement, nous pouvons tenter de comprendre les maladies tel que le cancer avec une perspective plus globale, en identifiant des g`enes responsables des cancers et en étudiant la mani`ere dont leurs produits protéiques fonctionnent dans un réseau d’interactions moléculaires. Dans ce contexte, nous avons collecté les g`enes spécifiquement liés `a la Leucémie Lymphoblastique Aigu"e (LLA), et identifié de nouveaux partenaires d'interaction qui relient ces g`enes clés associés `a la LLA tels que NOTCH1, FBW7, KRAS et PTPN11, dans un réseau d’interactions. Nous avons également tenté de prédire l’impact fonctionnel des variations génomiques tel que des fusions de g`enes impliquées dans LLA. En utilisant comme mod`eles trois différentes translocations chromosomiques ETV6-RUNX1 (TEL-AML1), BCR-ABL1, et E2A-PBX1 (TCF3-PBX1) fréquemment identifiées dans des cellules B LLA, nous avons adapté une approche de prédiction d’oncog`enes afin de prédire des perturbations moléculaires dans la LLA. Nous avons montré que les circuits transcriptomiques dépendant de Myc et JunD sont spécifiquement dérégulés suite aux fusions de g`enes TEL-AML1 et TCF3-PBX1, respectivement. Nous avons également identifié le mécanisme de transport des ARNm dépendant du facteur NXF1 comme une cible directe de la protéine de fusion TCF3-PBX1. Gr^ace `a cette approche combinant les données interactomiques et les analyses d'expression génique, nous avons fourni un nouvel aperccu `a la compréhension moléculaire de la Leucémie Lymphoblastique Aigu"e. |
Dendievel, Sarah Skip-free markov processes: analysis of regular perturbations PhD Thesis 2015, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/209050, title = {Skip-free markov processes: analysis of regular perturbations}, author = {Sarah Dendievel}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/209050}, year = {2015}, date = {2015-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Pozzolo, Andrea Dal Adaptive Machine Learning for Credit Card Fraud Detection PhD Thesis 2015, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/221654b, title = {Adaptive Machine Learning for Credit Card Fraud Detection}, author = {Andrea Dal Pozzolo}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/221654/5/contratDalPozzolo.pdf}, year = {2015}, date = {2015-01-01}, abstract = {Billions of dollars of loss are caused every year by fraudulent credit card transactions. The design of efficient fraud detection algorithms is key for reducing these losses, and more and more algorithms rely on advanced machine learning techniques to assist fraud investigators. The design of fraud detection algorithms is however particularly challenging due to the non-stationary distribution of the data, the highly unbalanced classes distributions and the availability of few transactions labeled by fraud investigators. At the same time public data are scarcely available for confidentiality issues, leaving unanswered many questions about what is the best strategy. In this thesis we aim to provide some answers by focusing on crucial issues such as: i) why and how undersampling is useful in the presence of class imbalance (i.e. frauds are a small percentage of the transactions), ii) how to deal with unbalanced and evolving data streams (non-stationarity due to fraud evolution and change of spending behavior), iii) how to assess performances in a way which is relevant for detection and iv) how to use feedbacks provided by investigators on the fraud alerts generated. Finally, we design and assess a prototype of a Fraud Detection System able to meet real-world working conditions and that is able to integrate investigators’ feedback to generate accurate alerts.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Billions of dollars of loss are caused every year by fraudulent credit card transactions. The design of efficient fraud detection algorithms is key for reducing these losses, and more and more algorithms rely on advanced machine learning techniques to assist fraud investigators. The design of fraud detection algorithms is however particularly challenging due to the non-stationary distribution of the data, the highly unbalanced classes distributions and the availability of few transactions labeled by fraud investigators. At the same time public data are scarcely available for confidentiality issues, leaving unanswered many questions about what is the best strategy. In this thesis we aim to provide some answers by focusing on crucial issues such as: i) why and how undersampling is useful in the presence of class imbalance (i.e. frauds are a small percentage of the transactions), ii) how to deal with unbalanced and evolving data streams (non-stationarity due to fraud evolution and change of spending behavior), iii) how to assess performances in a way which is relevant for detection and iv) how to use feedbacks provided by investigators on the fraud alerts generated. Finally, we design and assess a prototype of a Fraud Detection System able to meet real-world working conditions and that is able to integrate investigators’ feedback to generate accurate alerts. |
Lopes, Miguel Inference of gene networks from time series expression data and application to type 1 Diabetes PhD Thesis 2015, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/216729b, title = {Inference of gene networks from time series expression data and application to type 1 Diabetes}, author = {Miguel Lopes}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/216729/6/contratGasparLopes.pdf}, year = {2015}, date = {2015-01-01}, abstract = {The inference of gene regulatory networks (GRN) is of great importance to medical research, as causal mechanisms responsible for phenotypes are unravelled and potential therapeutical targets identified. In type 1 diabetes, insulin producing pancreatic beta-cells are the target of an auto-immune attack leading to apoptosis (cell suicide). Although key genes and regulations have been identified, a precise characterization of the process leading to beta-cell apoptosis has not been achieved yet. The inference of relevant molecular pathways in type 1 diabetes is then a crucial research topic. GRN inference from gene expression data (obtained from microarrays and RNA-seq technology) is a causal inference problem which may be tackled with well-established statistical and machine learning concepts. In particular, the use of time series facilitates the identification of the causal direction in cause-effect gene pairs. However, inference from gene expression data is a very challenging problem due to the large number of existing genes (in human, over twenty thousand) and the typical low number of samples in gene expression datasets. In this context, it is important to correctly assess the accuracy of network inference methods. The contributions of this thesis are on three distinct aspects. The first is on inference assessment using precision-recall curves, in particular using the area under the curve (AUPRC). The typical approach to assess AUPRC significance is using Monte Carlo, and a parametric alternative is proposed. It consists on deriving the mean and variance of the null AUPRC and then using these parameters to fit a beta distribution approximating the true distribution. The second contribution is an investigation on network inference from time series. Several state of the art strategies are experimentally assessed and novel heuristics are proposed. One is a fast approximation of first order Granger causality scores, suited for GRN inference in the large variable case. Another identifies co-regulated genes (ie. regulated by the same genes). Both are experimentally validated using microarray and simulated time series. The third contribution of this thesis is on the context of type 1 diabetes and is a study on beta cell gene expression after exposure to cytokines, emulating the mechanisms leading to apoptosis. 8 datasets of beta cell gene expression were used to identify differentially expressed genes before and after 24h, which were functionally characterized using bioinformatics tools. The two most differentially expressed genes, previously unknown in the type 1 Diabetes literature (RIPK2 and ELF3) were found to modulate cytokine induced apoptosis. A regulatory network was then inferred using a dynamic adaptation of a state of the art network inference method. Three out of four predicted regulations (involving RIPK2 and ELF3) were experimentally confirmed, providing a proof of concept for the adopted approach.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } The inference of gene regulatory networks (GRN) is of great importance to medical research, as causal mechanisms responsible for phenotypes are unravelled and potential therapeutical targets identified. In type 1 diabetes, insulin producing pancreatic beta-cells are the target of an auto-immune attack leading to apoptosis (cell suicide). Although key genes and regulations have been identified, a precise characterization of the process leading to beta-cell apoptosis has not been achieved yet. The inference of relevant molecular pathways in type 1 diabetes is then a crucial research topic. GRN inference from gene expression data (obtained from microarrays and RNA-seq technology) is a causal inference problem which may be tackled with well-established statistical and machine learning concepts. In particular, the use of time series facilitates the identification of the causal direction in cause-effect gene pairs. However, inference from gene expression data is a very challenging problem due to the large number of existing genes (in human, over twenty thousand) and the typical low number of samples in gene expression datasets. In this context, it is important to correctly assess the accuracy of network inference methods. The contributions of this thesis are on three distinct aspects. The first is on inference assessment using precision-recall curves, in particular using the area under the curve (AUPRC). The typical approach to assess AUPRC significance is using Monte Carlo, and a parametric alternative is proposed. It consists on deriving the mean and variance of the null AUPRC and then using these parameters to fit a beta distribution approximating the true distribution. The second contribution is an investigation on network inference from time series. Several state of the art strategies are experimentally assessed and novel heuristics are proposed. One is a fast approximation of first order Granger causality scores, suited for GRN inference in the large variable case. Another identifies co-regulated genes (ie. regulated by the same genes). Both are experimentally validated using microarray and simulated time series. The third contribution of this thesis is on the context of type 1 diabetes and is a study on beta cell gene expression after exposure to cytokines, emulating the mechanisms leading to apoptosis. 8 datasets of beta cell gene expression were used to identify differentially expressed genes before and after 24h, which were functionally characterized using bioinformatics tools. The two most differentially expressed genes, previously unknown in the type 1 Diabetes literature (RIPK2 and ELF3) were found to modulate cytokine induced apoptosis. A regulatory network was then inferred using a dynamic adaptation of a state of the art network inference method. Three out of four predicted regulations (involving RIPK2 and ELF3) were experimentally confirmed, providing a proof of concept for the adopted approach. |
Lerman, Liran A machine learning approach for automatic and generic side-channel attacks PhD Thesis 2015, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/209070, title = {A machine learning approach for automatic and generic side-channel attacks}, author = {Liran Lerman}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/209070/2/be487c5b-7b94-414c-bf2e-96847aa98284.txt}, year = {2015}, date = {2015-01-01}, abstract = {L'omniprésence de dispositifs interconnectés am`ene `a un intér^et massif pour la sécurité informatique fournie entre autres par le domaine de la cryptographie. Pendant des décennies, les spécialistes en cryptographie estimaient le niveau de sécurité d'un algorithme cryptographique indépendamment de son implantation dans un dispositif. Cependant, depuis la publication des attaques d'implantation en 1996, les attaques physiques sont devenues un domaine de recherche actif en considérant les propriétés physiques de dispositifs cryptographiques. Dans notre dissertation, nous nous concentrons sur les attaques profilées. Traditionnellement, les attaques profilées appliquent des méthodes paramétriques dans lesquelles une information a priori sur les propriétés physiques est supposée. Le domaine de l'apprentissage automatique produit des mod`eles automatiques et génériques ne nécessitant pas une information a priori sur le phénom`ene étudié. Cette dissertation apporte un éclairage nouveau sur les capacités des méthodes d'apprentissage automatique. Nous démontrons d'abord que les attaques profilées paramétriques surpassent les méthodes d'apprentissage automatique lorsqu'il n'y a pas d'erreur d'estimation ni d'hypoth`ese. En revanche, les attaques fondées sur l'apprentissage automatique sont avantageuses dans des scénarios réalistes o`u le nombre de données lors de l'étape d'apprentissage est faible. Par la suite, nous proposons une nouvelle métrique formelle d'évaluation qui permet (1) de comparer des attaques paramétriques et non-paramétriques et (2) d'interpréter les résultats de chaque méthode. La nouvelle mesure fournit les causes d'un taux de réussite élevé ou faible d'une attaque et, par conséquent, donne des pistes pour améliorer l'évaluation d'une implantation. Enfin, nous présentons des résultats expérimentaux sur des appareils non protégés et protégés. La premi`ere étude montre que l'apprentissage automatique a un taux de réussite plus élevé qu'une méthode paramétrique lorsque seules quelques données sont disponibles. La deuxi`eme expérience démontre qu'un dispositif protégé est attaquable avec une approche appartenant `a l'apprentissage automatique. La stratégie basée sur l'apprentissage automatique nécessite le m^eme nombre de données lors de la phase d'apprentissage que lorsque celle-ci attaque un produit non protégé. Nous montrons également que des méthodes paramétriques surestiment ou sous-estiment le niveau de sécurité fourni par l'appareil alors que l'approche basée sur l'apprentissage automatique améliore cette estimation. En résumé, notre th`ese est que les attaques basées sur l'apprentissage automatique sont avantageuses par rapport aux techniques classiques lorsque la quantité d'information a priori sur l'appareil cible et le nombre de données lors de la phase d'apprentissage sont faibles.}, L'omniprésence de dispositifs interconnectés am`ene `a un intér^et massif pour la sécurité informatique fournie entre autres par le domaine de la cryptographie. Pendant des décennies, les spécialistes en cryptographie estimaient le niveau de sécurité d'un algorithme cryptographique indépendamment de son implantation dans un dispositif. Cependant, depuis la publication des attaques d'implantation en 1996, les attaques physiques sont devenues un domaine de recherche actif en considérant les propriétés physiques de dispositifs cryptographiques. Dans notre dissertation, nous nous concentrons sur les attaques profilées. Traditionnellement, les attaques profilées appliquent des méthodes paramétriques dans lesquelles une information a priori sur les propriétés physiques est supposée. Le domaine de l'apprentissage automatique produit des mod`eles automatiques et génériques ne nécessitant pas une information a priori sur le phénom`ene étudié.<p><p>Cette dissertation apporte un éclairage nouveau sur les capacités des méthodes d'apprentissage automatique. Nous démontrons d'abord que les attaques profilées paramétriques surpassent les méthodes d'apprentissage automatique lorsqu'il n'y a pas d'erreur d'estimation ni d'hypoth`ese. En revanche, les attaques fondées sur l'apprentissage automatique sont avantageuses dans des scénarios réalistes o`u le nombre de données lors de l'étape d'apprentissage est faible. Par la suite, nous proposons une nouvelle métrique formelle d'évaluation qui permet (1) de comparer des attaques paramétriques et non-paramétriques et (2) d'interpréter les résultats de chaque méthode. La nouvelle mesure fournit les causes d'un taux de réussite élevé ou faible d'une attaque et, par conséquent, donne des pistes pour améliorer l'évaluation d'une implantation. Enfin, nous présentons des résultats expérimentaux sur des appareils non protégés et protégés. La premi`ere étude montre que l'apprentissage automatique a un taux de réussite plus élevé qu'une méthode paramétrique lorsque seules quelques données sont disponibles. La deuxi`eme expérience démontre qu'un dispositif protégé est attaquable avec une approche appartenant `a l'apprentissage automatique. La stratégie basée sur l'apprentissage automatique nécessite le m^eme nombre de données lors de la phase d'apprentissage que lorsque celle-ci attaque un produit non protégé. Nous montrons également que des méthodes paramétriques surestiment ou sous-estiment le niveau de sécurité fourni par l'appareil alors que l'approche basée sur l'apprentissage automatique améliore cette estimation. <p><p>En résumé, notre th`ese est que les attaques basées sur l'apprentissage automatique sont avantageuses par rapport aux techniques classiques lorsque la quantité d'information a priori sur l'appareil cible et le nombre de données lors de la phase d'apprentissage sont faibles. |
Hajingabo, Leon 2015, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/209126b, title = {Analyzing molecular network perturbations in human cancer: application to mutated genes and gene fusions involved in acute lymphoblastic leukemia}, author = {Leon Hajingabo}, url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/209126/3/d6d6225e-dfa8-46a3-985a-2ef19c71eff1.txt}, year = {2015}, date = {2015-01-01}, abstract = {Le séquenccage du génome humain et l'émergence de nouvelles technologies de génomique `a haut débit, ont initié de nouveaux mod`eles d'investigation pour l'analyse systématique des maladies humaines. Actuellement, nous pouvons tenter de comprendre les maladies tel que le cancer avec une perspective plus globale, en identifiant des g`enes responsables des cancers et en étudiant la mani`ere dont leurs produits protéiques fonctionnent dans un réseau d’interactions moléculaires. Dans ce contexte, nous avons collecté les g`enes spécifiquement liés `a la Leucémie Lymphoblastique Aigu"e (LLA), et identifié de nouveaux partenaires d'interaction qui relient ces g`enes clés associés `a la LLA tels que NOTCH1, FBW7, KRAS et PTPN11, dans un réseau d’interactions. Nous avons également tenté de prédire l’impact fonctionnel des variations génomiques tel que des fusions de g`enes impliquées dans LLA. En utilisant comme mod`eles trois différentes translocations chromosomiques ETV6-RUNX1 (TEL-AML1), BCR-ABL1, et E2A-PBX1 (TCF3-PBX1) fréquemment identifiées dans des cellules B LLA, nous avons adapté une approche de prédiction d’oncog`enes afin de prédire des perturbations moléculaires dans la LLA. Nous avons montré que les circuits transcriptomiques dépendant de Myc et JunD sont spécifiquement dérégulés suite aux fusions de g`enes TEL-AML1 et TCF3-PBX1, respectivement. Nous avons également identifié le mécanisme de transport des ARNm dépendant du facteur NXF1 comme une cible directe de la protéine de fusion TCF3-PBX1. Gr^ace `a cette approche combinant les données interactomiques et les analyses d'expression génique, nous avons fourni un nouvel aperccu `a la compréhension moléculaire de la Leucémie Lymphoblastique Aigu"e.}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Le séquenccage du génome humain et l'émergence de nouvelles technologies de génomique `a haut débit, ont initié de nouveaux mod`eles d'investigation pour l'analyse systématique des maladies humaines. Actuellement, nous pouvons tenter de comprendre les maladies tel que le cancer avec une perspective plus globale, en identifiant des g`enes responsables des cancers et en étudiant la mani`ere dont leurs produits protéiques fonctionnent dans un réseau d’interactions moléculaires. Dans ce contexte, nous avons collecté les g`enes spécifiquement liés `a la Leucémie Lymphoblastique Aigu"e (LLA), et identifié de nouveaux partenaires d'interaction qui relient ces g`enes clés associés `a la LLA tels que NOTCH1, FBW7, KRAS et PTPN11, dans un réseau d’interactions. Nous avons également tenté de prédire l’impact fonctionnel des variations génomiques tel que des fusions de g`enes impliquées dans LLA. En utilisant comme mod`eles trois différentes translocations chromosomiques ETV6-RUNX1 (TEL-AML1), BCR-ABL1, et E2A-PBX1 (TCF3-PBX1) fréquemment identifiées dans des cellules B LLA, nous avons adapté une approche de prédiction d’oncog`enes afin de prédire des perturbations moléculaires dans la LLA. Nous avons montré que les circuits transcriptomiques dépendant de Myc et JunD sont spécifiquement dérégulés suite aux fusions de g`enes TEL-AML1 et TCF3-PBX1, respectivement. Nous avons également identifié le mécanisme de transport des ARNm dépendant du facteur NXF1 comme une cible directe de la protéine de fusion TCF3-PBX1. Gr^ace `a cette approche combinant les données interactomiques et les analyses d'expression génique, nous avons fourni un nouvel aperccu `a la compréhension moléculaire de la Leucémie Lymphoblastique Aigu"e. |
Lopes, Miguel Inference of gene networks from time series expression data and application to type 1 Diabetes PhD Thesis 2015, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/216729c, title = {Inference of gene networks from time series expression data and application to type 1 Diabetes}, author = {Miguel Lopes}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/216729}, year = {2015}, date = {2015-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |
Pozzolo, Andrea Dal Adaptive Machine Learning for Credit Card Fraud Detection PhD Thesis 2015, (Funder: Universite Libre de Bruxelles). @phdthesis{info:hdl:2013/221654c, title = {Adaptive Machine Learning for Credit Card Fraud Detection}, author = {Andrea Dal Pozzolo}, url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/221654}, year = {2015}, date = {2015-01-01}, note = {Funder: Universite Libre de Bruxelles}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } |