Nachtegael, Charlotte
Active learning for biomedical relation extraction, the oligogenic use case PhD Thesis
2024.
@phdthesis{nokey,
title = {Active learning for biomedical relation extraction, the oligogenic use case},
author = {Nachtegael, Charlotte},
url = {https://difusion.ulb.ac.be/vufind/Record/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/375304/Holdings},
year = {2024},
date = {2024-06-28},
abstract = {In a context where technological advancements have enabled increased availability of genetic data through high-throughput sequencing technologies, the complexity of genetic diseases has become increasingly apparent. Oligogenic diseases, characterised by a combination of genetic variants in two or more genes, have emerged as a crucial research area, challenging the traditional model of "one genotype, one phenotype". Thus, understanding the underlying mechanisms and genetic interactions of oligogenic diseases has become a major priority in biomedical research. This context underlines the importance of developing dedicated tools to study these complex diseases.Our first major contribution, OLIDA, is an innovative database designed to collect data on variant combinations responsible for these diseases, filling significant gaps in the current knowledge, focused up until now on the digenic diseases. This resource, accessible via a web platform, adheres to FAIR principles and represents a significant advancement over its predecessor, DIDA, in terms of data curation and quality assessment.Furthermore, to support the biocuration of oligogenic diseases, we used active learning to construct DUVEL, a biomedical corpus focused on digenic variant combinations. To achieve this, we first investigated how to optimise these methods across numerous biomedical relation extraction datasets and developed a web-based platform, ALAMBIC, for text annotation using active learning. Our results and the quality of the corpus obtained demonstrate the effectiveness of active learning methods in biomedical relation annotation tasks.By establishing a curation pipeline for oligogenic diseases, as well as a standards for integrating active learning methods into biocuration, our work represents a significant advancement in the field of biomedical natural language processing and the understanding of oligogenic diseases.
},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Versbraegen, Nassim
Discovering multivariant pathogenic patterns among patients with rare diseases PhD Thesis
2024.
@phdthesis{nokey,
title = {Discovering multivariant pathogenic patterns among patients with rare diseases},
author = {Versbraegen, Nassim},
url = {https://difusion.ulb.ac.be/vufind/Record/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/375378/Holdings},
year = {2024},
date = {2024-06-24},
abstract = {Increasing evidence points to the complex interplay of multiple genetic variants as a major contributing factor in many human diseases. Oligogenic diseases, in which a small set of genes collaborate to cause a pathology, present a compelling example of this phenomenon and necessitate a shift away from traditional single-gene inheritance models. Our work aimed to develop robust methods for pinpointing pathogenic combinations of genetic variants across patient cohorts, ultimately improving disease understanding and potentially guiding future diagnostic approaches.We began by developing a novel machine learning framework that integrates explainable AI (XAI) techniques and game-theoretic concepts. This framework allows us to classify and characterise different types of oligogenic effects, providing insights into the specific mechanisms by which multiple genes interact to drive disease. Next, we focused on refining existing computational methods used to predict the pathogenicity of variant combinations. Our emphasis was two-fold: improving computational efficiency for handling the expansive datasets associated with cohort analysis, and critically, reducing false-positive rates to ensure the reliability of our results. With these tools in hand, we developed a specialised cohort analysis approach tailored to investigating diseases with complex genetic origins. To demonstrate the capabilities of our methodology, we delved into a Marfan syndrome cohort. Marfan syndrome is a hereditary condition affecting the body's connective tissue. Our analysis successfully uncovered potential modifier mutations that appear to interact with the primary disease-causing variant, offering new clues about the intricate genetic landscape of this condition.
},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Abels, Axel
2024.
@phdthesis{nokey,
title = {Resolving Knowledge Limitations for Improved Collective Intelligence: A novel online machine learning approach},
author = {Abels, Axel},
url = {https://difusion.ulb.ac.be/vufind/Record/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/373334/Holdings},
year = {2024},
date = {2024-04-23},
urldate = {2024-04-23},
abstract = {One of the reasons human groups struggle to make the best decisions is that they are inherently biased in their beliefs. In essence, our perception of what is true is often distorted by individual and social biases, including stereotypes. When individuals deliberate about a decision, they tend to transmit these beliefs to others, thereby steering the entire group away from the best decision. For example, a senior doctor could spread a misinterpretation of symptoms to junior doctors, resulting in inappropriate treatments. The primary objective of this thesis is to mitigate the impact of such biases on group decision-making in domains such as medical diagnostics, policy-making, and crowdsourced fact-checking. We propose to achieve this by having humans interact through a collective decision-making platform in charge of handling the aggregation of group knowledge. The key hypothesis here is that by carefully managing the collectivization of knowledge through this platform, it will be substantially harder for humans to impose their biases on the final decision. The core of our work involves the development and analysis of algorithms for decision-making systems. These algorithms are designed to effectively aggregate diverse expertise while addressing biases. We thus focus on aggregation methods that use online learning to foster collective intelligence more effectively. In doing so, we take into account the nuances of individual expertise and the impact of biases, aiming to filter out noise and enhance the reliability of collective decisions. Our theoretical analysis of the proposed algorithms is complemented by rigorous testing in both simulated and online experimental environments to validate the system’s effectiveness. Our results demonstrate a significant improvement in performance and reduction in bias influence. These findings not only highlight the potential of technology-assisted decision-making but also underscore the value of addressing human biases in collaborative environments.
},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Verhelst, Theo
2024.
@phdthesis{nokey,
title = {Causal and predictive modeling of customer churn - Lessons learned from empirical and theoretical research},
author = {Theo Verhelst},
url = {https://difusion.ulb.ac.be/vufind/Record/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/368384/Holdings},
year = {2024},
date = {2024-01-29},
urldate = {2024-01-29},
abstract = {Customer churn is an important concern for large companies, especially in the
telecommunications sector. Customer retention campaigns are often used to mitigate
churn, but targeting the right customers based on their historical profiles
presents an important challenge. Companies usually have recourse to two datadriven
approaches: churn prediction and uplift modeling. In churn prediction,
customers are selected on the basis of their propensity to churn in the near future.
In uplift modeling, only customers who react positively to the campaign
are considered. Uplift modeling is used in various other domains, such as marketing,
healthcare, and finance. Despite the theoretical appeal of uplift modeling, its
added value with respect to conventional machine learning approaches has rarely
been quantified in the literature.
This doctoral thesis is the result of a collaborative research project between
the Machine Learning Group (ULB) and Orange Belgium, funded by Innoviris.
This collaboration offers a unique research opportunity to assess the added value
of causal-oriented strategies to address customer churn in the telecommunication
sector. Following the introduction, we give the necessary background in probability
theory, causality theory, and machine learning, and we describe the state of
the art in uplift modeling and counterfactual identification. Then, we present the
contributions of this thesis:
• An empirical comparison of various predictive and causal models for selecting
customers in churn prevention campaigns. We perform several benchmarks
of different state-of-the-art approaches on real-world datasets and in
live campaigns with our industrial partner, we propose a new approach that
exploits domain knowledge to improve predictions, and we make available
the first public churn dataset for uplift modeling, whose unique characteristics
make it more challenging than the few other public uplift datasets.
• Counterfactual identification allows one to classify the different behaviors
of customers in response to a marketing incentive. This can be used to establish
profiles of customers sensitive to the campaign, and subsequently
improve marketing operations. We derive novel bounds and point estimators
on the probability of counterfactual statements based on uplift models.
• A comprehensive comparison of predictive and uplift modeling, starting
from firm theoretical foundations and highlighting the parameters that influence
the performance of both approaches. In particular, we provide a new
formulation of the measure of profit, a formal proof of the convergence of
the uplift curve to the measure of profit, and an illustration, through simulations,
of the conditions under which predictive approaches still outperform
uplift modeling.
Our theoretical and empirical assessments of uplift modeling suggest that it often
fails to deliver the anticipated advantages over predictive modeling, especially in
scenarios such as customer churn within the telecom sector, characterized by class
imbalance, limited separability, and cost-benefit considerations. These results are
broadly aligned with the practical experience of our industrial partner and with
the existing scientific literature. Our counterfactual probability estimators allow
us to characterize customers at a level inaccessible to conventional predictive modeling,
revealing new insights on the behavior and preferences of customers.},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
telecommunications sector. Customer retention campaigns are often used to mitigate
churn, but targeting the right customers based on their historical profiles
presents an important challenge. Companies usually have recourse to two datadriven
approaches: churn prediction and uplift modeling. In churn prediction,
customers are selected on the basis of their propensity to churn in the near future.
In uplift modeling, only customers who react positively to the campaign
are considered. Uplift modeling is used in various other domains, such as marketing,
healthcare, and finance. Despite the theoretical appeal of uplift modeling, its
added value with respect to conventional machine learning approaches has rarely
been quantified in the literature.
This doctoral thesis is the result of a collaborative research project between
the Machine Learning Group (ULB) and Orange Belgium, funded by Innoviris.
This collaboration offers a unique research opportunity to assess the added value
of causal-oriented strategies to address customer churn in the telecommunication
sector. Following the introduction, we give the necessary background in probability
theory, causality theory, and machine learning, and we describe the state of
the art in uplift modeling and counterfactual identification. Then, we present the
contributions of this thesis:
• An empirical comparison of various predictive and causal models for selecting
customers in churn prevention campaigns. We perform several benchmarks
of different state-of-the-art approaches on real-world datasets and in
live campaigns with our industrial partner, we propose a new approach that
exploits domain knowledge to improve predictions, and we make available
the first public churn dataset for uplift modeling, whose unique characteristics
make it more challenging than the few other public uplift datasets.
• Counterfactual identification allows one to classify the different behaviors
of customers in response to a marketing incentive. This can be used to establish
profiles of customers sensitive to the campaign, and subsequently
improve marketing operations. We derive novel bounds and point estimators
on the probability of counterfactual statements based on uplift models.
• A comprehensive comparison of predictive and uplift modeling, starting
from firm theoretical foundations and highlighting the parameters that influence
the performance of both approaches. In particular, we provide a new
formulation of the measure of profit, a formal proof of the convergence of
the uplift curve to the measure of profit, and an illustration, through simulations,
of the conditions under which predictive approaches still outperform
uplift modeling.
Our theoretical and empirical assessments of uplift modeling suggest that it often
fails to deliver the anticipated advantages over predictive modeling, especially in
scenarios such as customer churn within the telecom sector, characterized by class
imbalance, limited separability, and cost-benefit considerations. These results are
broadly aligned with the practical experience of our industrial partner and with
the existing scientific literature. Our counterfactual probability estimators allow
us to characterize customers at a level inaccessible to conventional predictive modeling,
revealing new insights on the behavior and preferences of customers.
Stefani, Jacopo De
Towards multivariate multi-step-ahead time series forecasting : A machine learning perspective PhD Thesis
2022, (Funder: Universite Libre de Bruxelles).
@phdthesis{info:hdl:2013/340052,
title = {Towards multivariate multi-step-ahead time series forecasting : A machine learning perspective},
author = {Jacopo De Stefani},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/340052},
year = {2022},
date = {2022-01-01},
note = {Funder: Universite Libre de Bruxelles},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Ciortan, Madalina
Unsupervised analysis of scRNA-seq data with machine learning models PhD Thesis
2022, (Funder: Universite Libre de Bruxelles).
@phdthesis{info:hdl:2013/340848,
title = {Unsupervised analysis of scRNA-seq data with machine learning models},
author = {Madalina Ciortan},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/340848},
year = {2022},
date = {2022-01-01},
note = {Funder: Universite Libre de Bruxelles},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Ciortan, Madalina; Defrance, Matthieu
GNN-based embedding for clustering scRNA-seq data Journal Article
In: Bioinformatics, vol. 38, no. 4, pp. 1037-1044, 2022, (DOI: 10.1093/bioinformatics/btab787).
@article{info:hdl:2013/343811,
title = {GNN-based embedding for clustering scRNA-seq data},
author = {Madalina Ciortan and Matthieu Defrance},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/343811},
year = {2022},
date = {2022-01-01},
journal = {Bioinformatics},
volume = {38},
number = {4},
pages = {1037-1044},
note = {DOI: 10.1093/bioinformatics/btab787},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Han, The Anh T. A. H.; Lenaerts, Tom; Santos, Francisco C.; Pereira, Luís Moniz
Voluntary safety commitments provide an escape from over-regulation in AI development Journal Article
In: Technology in society, vol. 68, 2022, (DOI: 10.1016/j.techsoc.2021.101843).
@article{info:hdl:2013/339040,
title = {Voluntary safety commitments provide an escape from over-regulation in AI development},
author = {The Anh T. A. H. Han and Tom Lenaerts and Francisco C. Santos and Luís Moniz Pereira},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/339040},
year = {2022},
date = {2022-01-01},
journal = {Technology in society},
volume = {68},
note = {DOI: 10.1016/j.techsoc.2021.101843},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Paldino, Gian Marco; Caro, Fabrizio De; Stefani, Jacopo De; Vaccaro, Alfredo A.; Villacci, Domenico D.; Bontempi, Gianluca
A Digital Twin Approach for Improving Estimation Accuracy in Dynamic Thermal Rating of Transmission Lines Journal Article
In: Energies, vol. 15, no. 6, 2022, (DOI: 10.3390/en15062254).
@article{info:hdl:2013/342471,
title = {A Digital Twin Approach for Improving Estimation Accuracy in Dynamic Thermal Rating of Transmission Lines},
author = {Gian Marco Paldino and Fabrizio De Caro and Jacopo De Stefani and Alfredo A. Vaccaro and Domenico D. Villacci and Gianluca Bontempi},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/342471},
year = {2022},
date = {2022-01-01},
journal = {Energies},
volume = {15},
number = {6},
note = {DOI: 10.3390/en15062254},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Nachtegael, Charlotte; Gravel, Barbara; Dillen, Arnau; Smits, Guillaume; Nowe, Ann; Papadimitriou, Sofia; Lenaerts, Tom
Scaling up oligogenic diseases research with OLIDA: The Oligogenic Diseases Database Journal Article
In: Database, vol. 2022, 2022, (DOI: 10.1093/database/baac023).
@article{info:hdl:2013/342417,
title = {Scaling up oligogenic diseases research with OLIDA: The Oligogenic Diseases Database},
author = {Charlotte Nachtegael and Barbara Gravel and Arnau Dillen and Guillaume Smits and Ann Nowe and Sofia Papadimitriou and Tom Lenaerts},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/342417},
year = {2022},
date = {2022-01-01},
journal = {Database},
volume = {2022},
note = {DOI: 10.1093/database/baac023},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Montero-Porras, Eladio; Grujić, Jelena; Domingos, Elias Fernández; Lenaerts, Tom
Inferring strategies from observations in long iterated Prisoner’s dilemma experiments Journal Article
In: Scientific reports, vol. 12, no. 1, 2022, (DOI: 10.1038/s41598-022-11654-2).
@article{info:hdl:2013/344327,
title = {Inferring strategies from observations in long iterated Prisoner’s dilemma experiments},
author = {Eladio Montero-Porras and Jelena Grujić and Elias Fernández Domingos and Tom Lenaerts},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/344327},
year = {2022},
date = {2022-01-01},
journal = {Scientific reports},
volume = {12},
number = {1},
note = {DOI: 10.1038/s41598-022-11654-2},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Cimpeanu, Theodor; Santos, Francisco C.; Pereira, Luís Marcelo; Lenaerts, Tom; Han, The Anh T. A. H.
Artificial intelligence development races in heterogeneous settings Journal Article
In: Scientific reports, vol. 12, no. 1, 2022, (DOI: 10.1038/s41598-022-05729-3).
@article{info:hdl:2013/341515,
title = {Artificial intelligence development races in heterogeneous settings},
author = {Theodor Cimpeanu and Francisco C. Santos and Luís Marcelo Pereira and Tom Lenaerts and The Anh T. A. H. Han},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/341515},
year = {2022},
date = {2022-01-01},
journal = {Scientific reports},
volume = {12},
number = {1},
note = {DOI: 10.1038/s41598-022-05729-3},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Marquis, Bastien; Jansen, Maarten
Information criteria bias correction for group selection Journal Article
In: Statistical papers, 2022, (Language of publication: fr).
@article{info:hdl:2013/335472,
title = {Information criteria bias correction for group selection},
author = {Bastien Marquis and Maarten Jansen},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/335472},
year = {2022},
date = {2022-01-01},
journal = {Statistical papers},
note = {Language of publication: fr},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jansen, Maarten
Wavelets from a Statistical Perspective Book
CRC Press, 2022, (Language of publication: fr).
@book{info:hdl:2013/333285,
title = {Wavelets from a Statistical Perspective},
author = {Maarten Jansen},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/333285},
year = {2022},
date = {2022-01-01},
publisher = {CRC Press},
note = {Language of publication: fr},
keywords = {},
pubstate = {published},
tppubtype = {book}
}
Buroni, Giovanni
On-Board-Unit big data analytics: from data architecture to traffic forecasting PhD Thesis
2021, (Funder: Universite Libre de Bruxelles).
@phdthesis{info:hdl:2013/334819,
title = {On-Board-Unit big data analytics: from data architecture to traffic forecasting},
author = {Giovanni Buroni},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/334819},
year = {2021},
date = {2021-01-01},
note = {Funder: Universite Libre de Bruxelles},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Han, The Anh T. A. H.; Lenaerts, Tom; Santos, Francisco C; Pereira, Luís Moniz
Voluntary safety commitments provide an escape from over-regulation in AI development Miscellaneous
2021, (Conference: International Conference on Complex Systems(25-29/10/2021: Lyon,France)).
@misc{info:hdl:2013/336169,
title = {Voluntary safety commitments provide an escape from over-regulation in AI development},
author = {The Anh T. A. H. Han and Tom Lenaerts and Francisco C Santos and Luís Moniz Pereira},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/336169},
year = {2021},
date = {2021-01-01},
note = {Conference: International Conference on Complex Systems(25-29/10/2021: Lyon,France)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Han, The Anh T. A. H.; Pereira, Luis Moniz; Santos, Francisco C; Lenaerts, Tom
Time-scale Differences will Influence the Regulation Required in an Idealised AI Race Game Miscellaneous
2021, (Conference: International Joint Conference on Artificial Intelligence (IJCAI)(30: 19-26/8/2021: Montreal, Canada)).
@misc{info:hdl:2013/336167,
title = {Time-scale Differences will Influence the Regulation Required in an Idealised AI Race Game},
author = {The Anh T. A. H. Han and Luis Moniz Pereira and Francisco C Santos and Tom Lenaerts},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/336167},
year = {2021},
date = {2021-01-01},
note = {Conference: International Joint Conference on Artificial Intelligence (IJCAI)(30: 19-26/8/2021: Montreal, Canada)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Han, The Anh T. A. H.; Pereira, Luis Moniz; Lenaerts, Tom; Santos, Francisco C
Mediating Artificial Intelligence Developments through Negative and Positive Incentives Miscellaneous
2021, (Conference: International Conference on Complex Systems(25-29/10/2021: Lyon, France)).
@misc{info:hdl:2013/336166,
title = {Mediating Artificial Intelligence Developments through Negative and Positive Incentives},
author = {The Anh T. A. H. Han and Luis Moniz Pereira and Tom Lenaerts and Francisco C Santos},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/336166},
year = {2021},
date = {2021-01-01},
note = {Conference: International Conference on Complex Systems(25-29/10/2021: Lyon, France)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Cimpeanu, Theodor; Han, The Anh T. A. H.; Santos, Francisco C; Pereira, Luis Moniz; Lenaerts, Tom
Heterogeneous Interactions in Artificial Intelligence Development Races Miscellaneous
2021, (Conference: International Conference on Complex Systems(25-29/10/2021: Lyon. France)).
@misc{info:hdl:2013/336168,
title = {Heterogeneous Interactions in Artificial Intelligence Development Races},
author = {Theodor Cimpeanu and The Anh T. A. H. Han and Francisco C Santos and Luis Moniz Pereira and Tom Lenaerts},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/336168},
year = {2021},
date = {2021-01-01},
note = {Conference: International Conference on Complex Systems(25-29/10/2021: Lyon. France)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Domingos, Elias Fernández; Grujić, Jelena; Burguillo, Juan Carlos; Santos, Francisco C; Lenaerts, Tom
Modeling behavioral experiments on uncertainty and cooperation with population-based reinforcement learning Miscellaneous
2021, (Conference: Artificial Life Conference(19-23/7/2021: Prague, Czech Republic)).
@misc{info:hdl:2013/336173,
title = {Modeling behavioral experiments on uncertainty and cooperation with population-based reinforcement learning},
author = {Elias Fernández Domingos and Jelena Grujić and Juan Carlos Burguillo and Francisco C Santos and Tom Lenaerts},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/336173},
year = {2021},
date = {2021-01-01},
note = {Conference: Artificial Life Conference(19-23/7/2021: Prague, Czech Republic)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}