2023
|
Claeskens, G.; Jansen, Maarten Comments on: Statistical inference and large-scale multiple testing for high-dimensional regression models Journal Article In: Test, vol. 32, no. 4, pp. 1177-1179, 2023, (DOI: 10.1007/s11749-023-00896-5). @article{info:hdl:2013/371479,
title = {Comments on: Statistical inference and large-scale multiple testing for high-dimensional regression models},
author = {G. Claeskens and Maarten Jansen},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/371479/3/ClaeskensJansen-TEST-noformat.pdf},
year = {2023},
date = {2023-01-01},
journal = {Test},
volume = {32},
number = {4},
pages = {1177-1179},
note = {DOI: 10.1007/s11749-023-00896-5},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Lunghi, Daniele; Paldino, Gian Marco; Caelen, Olivier; Bontempi, Gianluca An Adversary Model of Fraudsters’ Behavior to Improve Oversampling in Credit Card Fraud Detection Journal Article In: IEEE access, vol. 11, pp. 136666-136679, 2023, (DOI: 10.1109/ACCESS.2023.3337635). @article{info:hdl:2013/367638,
title = {An Adversary Model of Fraudsters’ Behavior to Improve Oversampling in Credit Card Fraud Detection},
author = {Daniele Lunghi and Gian Marco Paldino and Olivier Caelen and Gianluca Bontempi},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/367638/1/doi_351282.pdf},
year = {2023},
date = {2023-01-01},
journal = {IEEE access},
volume = {11},
pages = {136666-136679},
abstract = {Imbalanced learning jeopardizes the accuracy of traditional classification models, particularly for what concerns the minority class, which is often the class of interest. This paper addresses the issue of imbalanced learning in credit card fraud detection by introducing a novel approach that models fraudulent behavior as a time-dependent process. The main contribution is the design and assessment of an oversampling strategy, called 'Adversary-based Oversampling' (ADVO), which relies on modeling the temporal relationship among frauds. The strategy is implemented by two learning approaches: first, an innovative regression-based oversampling model that predicts subsequent fraudulent activities based on previous fraud features. Second, the adaptation of the state-of-the-art TimeGAN oversampling algorithm to the context of credit card fraud detection. This adaptation involves treating a sequence of frauds from the same card as a time series, from which artificial frauds' time series are generated. Experiments have been conducted using real credit card transaction data from our industrial partner, Worldline S.A, and a synthetic dataset generated by a transaction simulator for reproducibility purposes. Our findings show that an oversampling approach incorporating time-dependent modeling of frauds provides competitive results, measured against common fraud detection metrics, compared to traditional oversampling algorithms.},
note = {DOI: 10.1109/ACCESS.2023.3337635},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Imbalanced learning jeopardizes the accuracy of traditional classification models, particularly for what concerns the minority class, which is often the class of interest. This paper addresses the issue of imbalanced learning in credit card fraud detection by introducing a novel approach that models fraudulent behavior as a time-dependent process. The main contribution is the design and assessment of an oversampling strategy, called 'Adversary-based Oversampling' (ADVO), which relies on modeling the temporal relationship among frauds. The strategy is implemented by two learning approaches: first, an innovative regression-based oversampling model that predicts subsequent fraudulent activities based on previous fraud features. Second, the adaptation of the state-of-the-art TimeGAN oversampling algorithm to the context of credit card fraud detection. This adaptation involves treating a sequence of frauds from the same card as a time series, from which artificial frauds' time series are generated. Experiments have been conducted using real credit card transaction data from our industrial partner, Worldline S.A, and a synthetic dataset generated by a transaction simulator for reproducibility purposes. Our findings show that an oversampling approach incorporating time-dependent modeling of frauds provides competitive results, measured against common fraud detection metrics, compared to traditional oversampling algorithms. |
Verhelst, Theo; Mercier, Denis; Shrestha, Jeevan; Bontempi, Gianluca Partial counterfactual identification and uplift modeling: theoretical results and real-world assessment Journal Article In: Machine learning, 2023, (DOI: 10.1007/s10994-023-06317-w). @article{info:hdl:2013/367639,
title = {Partial counterfactual identification and uplift modeling: theoretical results and real-world assessment},
author = {Theo Verhelst and Denis Mercier and Jeevan Shrestha and Gianluca Bontempi},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/367639/3/MLJ.pdf},
year = {2023},
date = {2023-01-01},
journal = {Machine learning},
abstract = {Counterfactuals are central in causal human reasoning and the scientific discovery process. The uplift, also called conditional average treatment effect, measures the causal effect of some action, or treatment, on the outcome of an individual. This paper discusses how it is possible to derive bounds on the probability of counterfactual statements based on uplift terms. First, we derive some original bounds on the probability of counterfactuals and we show that tightness of such bounds depends on the information of the feature set on the uplift term. Then, we propose a point estimator based on the assumption of conditional independence between the counterfactual outcomes. The quality of the bounds and the point estimators are assessed on synthetic data and a large real-world customer data set provided by a telecom company, showing significant improvement over the state of the art.},
note = {DOI: 10.1007/s10994-023-06317-w},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Counterfactuals are central in causal human reasoning and the scientific discovery process. The uplift, also called conditional average treatment effect, measures the causal effect of some action, or treatment, on the outcome of an individual. This paper discusses how it is possible to derive bounds on the probability of counterfactual statements based on uplift terms. First, we derive some original bounds on the probability of counterfactuals and we show that tightness of such bounds depends on the information of the feature set on the uplift term. Then, we propose a point estimator based on the assumption of conditional independence between the counterfactual outcomes. The quality of the bounds and the point estimators are assessed on synthetic data and a large real-world customer data set provided by a telecom company, showing significant improvement over the state of the art. |
Coelho, Léonardo Bertolucci; Morillo, Daniel Torres; Bernal, Miguel; Paldino, Gian Marco; Bontempi, Gianluca; Troyano, Jon Ustarroz Probing the randomness of the local current distributions of 316 L stainless steel corrosion in NaCl solution Journal Article In: Corrosion science, vol. 217, pp. 111104, 2023, (DOI: 10.1016/j.corsci.2023.111104). @article{info:hdl:2013/358502,
title = {Probing the randomness of the local current distributions of 316 L stainless steel corrosion in NaCl solution},
author = {Léonardo Bertolucci Coelho and Daniel Torres Morillo and Miguel Bernal and Gian Marco Paldino and Gianluca Bontempi and Jon Ustarroz Troyano},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/358502/3/BertolucciCoelho2023_PostPrint.pdf},
year = {2023},
date = {2023-01-01},
journal = {Corrosion science},
volume = {217},
pages = {111104},
abstract = {This investigation proposes using Scanning Electrochemical Cell Microscopy (SECCM) as a high throughput tool to collect corrosion activity from randomly probed locations on 316 L SS. In the presence of chloride, potentiodynamic polarisation tests triggered the development of pitting corrosion. Data science methods were deployed to handle and explore 955 j Vs E curves. Normality tests and fitting with theoretical functions were used to understand the conditional log(j) distributions at different potentials. Unimodal and uniform distributions were assigned to the passive and pitting regions. Our “big-data” local strategy revealed a potential-dependent distribution of log(j), with the randomness increasing with testing aggressiveness. Data availability: All data generated or analysed during this study are included in this published article (and its supplementary information files) and are available in the Mendeley Data repository, [https://data.mendeley.com/datasets/78rz8vw46x/2]. Code availability: The code required to reproduce these findings is included in this published article (and its supplementary information files) and is available to download from GitHub: https://github.com/bcoelho-leonardo/Data-driven-analysis-of-the-local-current-distributions-of-316L-corrosion-in-NaCl-solution/blob/4efff485b115468840b25ea56ad81b31711c0f51/local%20current%20distributions%20of%20316L%20corrosion.ipynb.},
note = {DOI: 10.1016/j.corsci.2023.111104},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
This investigation proposes using Scanning Electrochemical Cell Microscopy (SECCM) as a high throughput tool to collect corrosion activity from randomly probed locations on 316 L SS. In the presence of chloride, potentiodynamic polarisation tests triggered the development of pitting corrosion. Data science methods were deployed to handle and explore 955 j Vs E curves. Normality tests and fitting with theoretical functions were used to understand the conditional log(j) distributions at different potentials. Unimodal and uniform distributions were assigned to the passive and pitting regions. Our “big-data” local strategy revealed a potential-dependent distribution of log(j), with the randomness increasing with testing aggressiveness. Data availability: All data generated or analysed during this study are included in this published article (and its supplementary information files) and are available in the Mendeley Data repository, [https://data.mendeley.com/datasets/78rz8vw46x/2]. Code availability: The code required to reproduce these findings is included in this published article (and its supplementary information files) and is available to download from GitHub: https://github.com/bcoelho-leonardo/Data-driven-analysis-of-the-local-current-distributions-of-316L-corrosion-in-NaCl-solution/blob/4efff485b115468840b25ea56ad81b31711c0f51/local%20current%20distributions%20of%20316L%20corrosion.ipynb. |
Salamanca, Eva Muñoz; Dave, Himanshu; D'Alessio, Giuseppe; Bontempi, Gianluca; Parente, Alessandro; Clainche, Soledad Le Extraction and analysis of flow features in planar synthetic jets using different machine learning techniques Journal Article In: Physics of fluids, vol. 35, 2023, (DOI: https://doi.org/10.1063/5.0163833). @article{info:hdl:2013/363688,
title = {Extraction and analysis of flow features in planar synthetic jets using different machine learning techniques},
author = {Eva Muñoz Salamanca and Himanshu Dave and Giuseppe D'Alessio and Gianluca Bontempi and Alessandro Parente and Soledad Le Clainche},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/363688/3/POF_dimensionality_reduction_comparison.pdf},
year = {2023},
date = {2023-01-01},
journal = {Physics of fluids},
volume = {35},
note = {DOI: https://doi.org/10.1063/5.0163833},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Coelho, Léonardo Bertolucci; Morillo, Daniel Torres; Vangrunderbeek, Vincent; Bernal, Miguel; Paldino, Gian Marco; Bontempi, Gianluca; Troyano, Jon Ustarroz Estimating pitting descriptors of 316 L stainless steel by machine learning and statistical analysis Journal Article In: npj Materials degradation, vol. 7, no. 1, 2023, (DOI: 10.1038/s41529-023-00403-z). @article{info:hdl:2013/367646,
title = {Estimating pitting descriptors of 316 L stainless steel by machine learning and statistical analysis},
author = {Léonardo Bertolucci Coelho and Daniel Torres Morillo and Vincent Vangrunderbeek and Miguel Bernal and Gian Marco Paldino and Gianluca Bontempi and Jon Ustarroz Troyano},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/367646/1/doi_351290.pdf},
year = {2023},
date = {2023-01-01},
journal = {npj Materials degradation},
volume = {7},
number = {1},
abstract = {Abstract A hybrid rule-based/ML approach using linear regression and artificial neural networks (ANNs) determined pitting corrosion descriptors from high-throughput data obtained with Scanning Electrochemical Cell Microscopy (SECCM) on 316 L stainless steel. Non-parametric density estimation determined the central tendencies of the E pit /log( jpit ) and E pass /log( jpass ) distributions. Descriptors estimated using conditional mean or median curves were compared to their central tendency values, with the conditional medians providing more accurate results. Due to their lower sensitivity to high outliers, the conditional medians were more robust representations of the log( j ) vs. E distributions. An observed trend of passive range shortening with increasing testing aggressiveness was attributed to delayed stabilisation of the passive film, rather than early passivity breakdown.},
note = {DOI: 10.1038/s41529-023-00403-z},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Abstract A hybrid rule-based/ML approach using linear regression and artificial neural networks (ANNs) determined pitting corrosion descriptors from high-throughput data obtained with Scanning Electrochemical Cell Microscopy (SECCM) on 316 L stainless steel. Non-parametric density estimation determined the central tendencies of the E pit /log( jpit ) and E pass /log( jpass ) distributions. Descriptors estimated using conditional mean or median curves were compared to their central tendency values, with the conditional medians providing more accurate results. Due to their lower sensitivity to high outliers, the conditional medians were more robust representations of the log( j ) vs. E distributions. An observed trend of passive range shortening with increasing testing aggressiveness was attributed to delayed stabilisation of the passive film, rather than early passivity breakdown. |
2022
|
Nachtegael, Charlotte; Gravel, Barbara; Dillen, Arnau; Smits, Guillaume; Nowe, Ann; Papadimitriou, Sofia; Lenaerts, Tom Scaling up the oligogenic diseases research with OLIDA: the Oligogenic Diseases Database Miscellaneous 2022, (Conference: Genomics of Rare Disease 2022). @misc{info:hdl:2013/352609b,
title = {Scaling up the oligogenic diseases research with OLIDA: the Oligogenic Diseases Database},
author = {Charlotte Nachtegael and Barbara Gravel and Arnau Dillen and Guillaume Smits and Ann Nowe and Sofia Papadimitriou and Tom Lenaerts},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/352609/3/poster_GenomicsDiseases2022.pdf},
year = {2022},
date = {2022-01-01},
abstract = {The study of genetic variation associated with disease has shown the inadequacy of the “one gene - one disease phenotype” paradigm for many cases, leading to the notion of a conceptual continuum starting from monogenic disorders to oligogenic and polygenic diseases. An important step towards understanding non-Mendelian disorders was the creation of the Digenic Diseases Database (DIDA), collecting curated scientific information on digenic variant combinations involved in digenic diseases. Different machine learning methods aiming to tackle the cause of digenic diseases have successfully used DIDA as a benchmark dataset and have been in turn used in scientific studies analysing novel oligogenic cases. While this marked a new age of predictive tools and underlined the importance of DIDA, these advances also demonstrated the need to expand further in the genetic disease continuum, beyond digenic diseases, in a continuous and more careful manner. Moreover, a structured re-evaluation of the inclusion of oligogenic combinations in such a database and their pathogenic link to diseases has become essential, in order to aid researchers in using high-quality and properly curated information when assessing their medical cases. We present OLIDA (https://olida.ibsquare.be/), the Oligogenic Diseases Database, which reinvents DIDA, containing newly and fully re-curated data and freely accessible information on oligogenic variant combinations, i.e. combinations of variants in multiple genes involved in an oligogenic disease, published in the scientific literature until February 2020. The database includes 916 oligogenic variant combinations, 192 of them involving more than two genes, linked to 159 genetic diseases. OLIDA provides, for the first time in the field, a structured protocol for the evaluation of the pathogenicity of each oligogenic combination, based on the genetic and functional evidence supporting it, paying special attention to their joint variant effect. The evidence is derived from a combination of the results presented in the scientific papers and information from knowledge databases, and is depicted with a confidence score. OLIDA further follows the FAIR principles on data management. To conclude, OLIDA is the first database containing oligogenic variant combinations and, for each, a confidence score of its pathogenic involvement in the associated disease. With this work, we are initiating the important discussion on how the evidence of pathogenicity related to oligogenic diseases should be reported and evaluated in the scientific literature, a concept that becomes increasingly important with the growing amount of data in the field.},
note = {Conference: Genomics of Rare Disease 2022},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
The study of genetic variation associated with disease has shown the inadequacy of the “one gene - one disease phenotype” paradigm for many cases, leading to the notion of a conceptual continuum starting from monogenic disorders to oligogenic and polygenic diseases. An important step towards understanding non-Mendelian disorders was the creation of the Digenic Diseases Database (DIDA), collecting curated scientific information on digenic variant combinations involved in digenic diseases. Different machine learning methods aiming to tackle the cause of digenic diseases have successfully used DIDA as a benchmark dataset and have been in turn used in scientific studies analysing novel oligogenic cases. While this marked a new age of predictive tools and underlined the importance of DIDA, these advances also demonstrated the need to expand further in the genetic disease continuum, beyond digenic diseases, in a continuous and more careful manner. Moreover, a structured re-evaluation of the inclusion of oligogenic combinations in such a database and their pathogenic link to diseases has become essential, in order to aid researchers in using high-quality and properly curated information when assessing their medical cases. We present OLIDA (https://olida.ibsquare.be/), the Oligogenic Diseases Database, which reinvents DIDA, containing newly and fully re-curated data and freely accessible information on oligogenic variant combinations, i.e. combinations of variants in multiple genes involved in an oligogenic disease, published in the scientific literature until February 2020. The database includes 916 oligogenic variant combinations, 192 of them involving more than two genes, linked to 159 genetic diseases. OLIDA provides, for the first time in the field, a structured protocol for the evaluation of the pathogenicity of each oligogenic combination, based on the genetic and functional evidence supporting it, paying special attention to their joint variant effect. The evidence is derived from a combination of the results presented in the scientific papers and information from knowledge databases, and is depicted with a confidence score. OLIDA further follows the FAIR principles on data management. To conclude, OLIDA is the first database containing oligogenic variant combinations and, for each, a confidence score of its pathogenic involvement in the associated disease. With this work, we are initiating the important discussion on how the evidence of pathogenicity related to oligogenic diseases should be reported and evaluated in the scientific literature, a concept that becomes increasingly important with the growing amount of data in the field. |
Terrucha, Ines; Domingos, Elias Fernandez; Santos, Francisco C; Simoens, Pieter; Lenaerts, Tom The art of compensation : how hybrid teams solve collective risk dilemmas Miscellaneous 2022, (Conference: Adaptive and Learning Agents (ALA) Workshop(9-10/5/2022: Auckland, NZ)). @misc{info:hdl:2013/366661,
title = {The art of compensation : how hybrid teams solve collective risk dilemmas},
author = {Ines Terrucha and Elias Fernandez Domingos and Francisco C Santos and Pieter Simoens and Tom Lenaerts},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/366661/3/8090.pdf},
year = {2022},
date = {2022-01-01},
note = {Conference: Adaptive and Learning Agents (ALA) Workshop(9-10/5/2022: Auckland, NZ)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Montero-Porras, Eladio; Grujić, Jelena; Domingos, Elias Fernandez; Lenaerts, Tom Inferring Strategies from Observations in Long Iterated Prisoner’s Dilemma Experiments Miscellaneous 2022, (Conference: International Conference on Social Dilemmas(19-22/07/2022: Coppenhagen, Denmark)). @misc{info:hdl:2013/366679b,
title = {Inferring Strategies from Observations in Long Iterated Prisoner’s Dilemma Experiments},
author = {Eladio Montero-Porras and Jelena Grujić and Elias Fernandez Domingos and Tom Lenaerts},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/366679/3/Inferring_Strategies.pdf},
year = {2022},
date = {2022-01-01},
note = {Conference: International Conference on Social Dilemmas(19-22/07/2022: Coppenhagen, Denmark)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Versbraegen, Nassim; Gravel, Barbara; Nachtegael, Charlotte; Renaux, Alexandre; Verkinderen, Emma; Nowé, Ann; Lenaerts, Tom; Papadimitriou, Sofia Taking the prediction of pathogenic variant-combinations to the next level with VarCoPP2.0 Miscellaneous 2022, (Conference: European Conference on Computational Biology (21: 12-21 September 2022: Sitges, Barcelona)). @misc{info:hdl:2013/352566b,
title = {Taking the prediction of pathogenic variant-combinations to the next level with VarCoPP2.0},
author = {Nassim Versbraegen and Barbara Gravel and Charlotte Nachtegael and Alexandre Renaux and Emma Verkinderen and Ann Nowé and Tom Lenaerts and Sofia Papadimitriou},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/352566},
year = {2022},
date = {2022-01-01},
note = {Conference: European Conference on Computational Biology (21: 12-21 September 2022: Sitges, Barcelona)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Montero-Porras, Eladio; Gruji'c, Jelena; Domingos, Elias Fernandez; Lenaerts, Tom Inferring Strategies from Observations in Long Iterated Prisoner’s Dilemma Experiments Miscellaneous 2022, (Conference: Complex Systems Conference 2022(17-21/10/2022: Palma de Mallorca, Spain)). @misc{info:hdl:2013/366678b,
title = {Inferring Strategies from Observations in Long Iterated Prisoner’s Dilemma Experiments},
author = {Eladio Montero-Porras and Jelena Gruji'c and Elias Fernandez Domingos and Tom Lenaerts},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/366678/3/Inferring_Strategies.pdf},
year = {2022},
date = {2022-01-01},
note = {Conference: Complex Systems Conference 2022(17-21/10/2022: Palma de Mallorca, Spain)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Abels, Axel; Lenaerts, Tom; Trianni, Vito; Nowé, Ann A Novel Approach to Handle Non-stationarity in Collective Decision-Making with Experts Miscellaneous 2022, (Conference: ACM Collective Intelligence Conference 2022(20-21 Octobre 2022: Online)). @misc{info:hdl:2013/352851b,
title = {A Novel Approach to Handle Non-stationarity in Collective Decision-Making with Experts},
author = {Axel Abels and Tom Lenaerts and Vito Trianni and Ann Nowé},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/352851/3/6ea4f4cccc1241fa8855f17e91c8ee51.pdf},
year = {2022},
date = {2022-01-01},
note = {Conference: ACM Collective Intelligence Conference 2022(20-21 Octobre 2022: Online)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Abels, Axel; Domingos, Elias Fernandez; Lenaerts, Tom; Trianni, Vito; Nowé, Ann Bias Mitigation in Decision-Making with Expert Advice Miscellaneous 2022, (Conference: Benelux AI Conference (BNAIC) and Benelux machine learning conference (Benelearn)(7-9/11/2022: Antwerpen, Belgique)). @misc{info:hdl:2013/366668b,
title = {Bias Mitigation in Decision-Making with Expert Advice},
author = {Axel Abels and Elias Fernandez Domingos and Tom Lenaerts and Vito Trianni and Ann Nowé},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/366668/3/BNAIC2022-BiasMitigation.pdf},
year = {2022},
date = {2022-01-01},
note = {Conference: Benelux AI Conference (BNAIC) and Benelux machine learning conference (Benelearn)(7-9/11/2022: Antwerpen, Belgique)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Papadimitriou, Sofia; Gravel, Barbara; Nachtegael, Charlotte; Baere, Elfride De; Loeys, Bart; Vikkula, Miikka; Smits, Guillaume; Lenaerts, Tom The importance of good data quality and proper pathogenicity reporting in the medical genetics field: the case of oligogenic diseases Miscellaneous 2022, (Conference: Rare Med Symposium(8-12-2022: Gent)). @misc{info:hdl:2013/366742,
title = {The importance of good data quality and proper pathogenicity reporting in the medical genetics field: the case of oligogenic diseases},
author = {Sofia Papadimitriou and Barbara Gravel and Charlotte Nachtegael and Elfride De Baere and Bart Loeys and Miikka Vikkula and Guillaume Smits and Tom Lenaerts},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/366742/3/Abstract_GRD.pdf},
year = {2022},
date = {2022-01-01},
abstract = {Background/Aims:Reports of oligogenic cases (i.e. individuals whose disease phenotype can only be explained by the co-occurrence of multiple variants in several genes) have been rapidly increasing, in an effort to close the gap of missing genetic diagnoses. Nevertheless, the quality of this data had never been properly assessed, especially as standards and guidelines for such cases are currently missing. This work, aimed to collect all reported oligogenic cases in one database, OLIDA, assess the quality of the reported information and provide, for the first time, recommendations for their proper reporting. Methods:318 research articles reporting oligogenic cases were extracted from PubMed. Independent curators collected the relevant oligogenic information (i) from the articles and (ii) from public relevant databases. With this data, a transparent curation protocol was developed assigning a confidence score to each oligogenic case based on the amount of pathogenic evidence at the genetic and functional level. The collection and assessment of this data led to the creation of OLIDA, the Oligogenic Diseases Database. Results:OLIDA contains information on oligogenic cases linked to 177 different genetic diseases. Each instance is linked with a confidence score depicting the quality of the associated genetic and functional pathogenic evidence. The data revealed that the majority of papers do not provide proper genetic evidence excluding a monogenic model, while this evidence is rarely coupled with functional experiments for confirmation. Our recommendations stress the necessity of fulfilling both conditions. The use of multiple extended pedigrees showing a clear segregation of the reported variants, control cohorts of a suitable size, as well as functional experiments showing the synergistic effect of the involved variants are essential for this purpose. Conclusion:With our work we reveal the recurrent issues on the reporting of oligogenic cases and stress the need for the development of standards in the field. As the number of papers identifying oligogenic causes to disease is increasing rapidly, initiating this discussion is imperative.},
note = {Conference: Rare Med Symposium(8-12-2022: Gent)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Background/Aims:Reports of oligogenic cases (i.e. individuals whose disease phenotype can only be explained by the co-occurrence of multiple variants in several genes) have been rapidly increasing, in an effort to close the gap of missing genetic diagnoses. Nevertheless, the quality of this data had never been properly assessed, especially as standards and guidelines for such cases are currently missing. This work, aimed to collect all reported oligogenic cases in one database, OLIDA, assess the quality of the reported information and provide, for the first time, recommendations for their proper reporting. Methods:318 research articles reporting oligogenic cases were extracted from PubMed. Independent curators collected the relevant oligogenic information (i) from the articles and (ii) from public relevant databases. With this data, a transparent curation protocol was developed assigning a confidence score to each oligogenic case based on the amount of pathogenic evidence at the genetic and functional level. The collection and assessment of this data led to the creation of OLIDA, the Oligogenic Diseases Database. Results:OLIDA contains information on oligogenic cases linked to 177 different genetic diseases. Each instance is linked with a confidence score depicting the quality of the associated genetic and functional pathogenic evidence. The data revealed that the majority of papers do not provide proper genetic evidence excluding a monogenic model, while this evidence is rarely coupled with functional experiments for confirmation. Our recommendations stress the necessity of fulfilling both conditions. The use of multiple extended pedigrees showing a clear segregation of the reported variants, control cohorts of a suitable size, as well as functional experiments showing the synergistic effect of the involved variants are essential for this purpose. Conclusion:With our work we reveal the recurrent issues on the reporting of oligogenic cases and stress the need for the development of standards in the field. As the number of papers identifying oligogenic causes to disease is increasing rapidly, initiating this discussion is imperative. |
Piron, Anthony; Szymczak, Florian; Alvelos, Maria De Oliveira; Defrance, Matthieu; Lenaerts, Tom; Eizirik, Decio L.; Cnop, Miriam RedRibbon: A new rank-rank hypergeometric overlap pipeline to compare gene and transcript expression signatures Journal Article In: BioRxiv, 2022, (DOI: https://doi.org/10.1101/2022.08.31.505818). @article{info:hdl:2013/353212d,
title = {RedRibbon: A new rank-rank hypergeometric overlap pipeline to compare gene and transcript expression signatures},
author = {Anthony Piron and Florian Szymczak and Maria De Oliveira Alvelos and Matthieu Defrance and Tom Lenaerts and Decio L. Eizirik and Miriam Cnop},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/353212/3/2022.08.31.505818v1.full.pdf},
year = {2022},
date = {2022-01-01},
journal = {BioRxiv},
abstract = {Motivation. High throughput omics technologies have generated a wealth of large protein, gene and transcript datasets that have exacerbated the need for new methods to analyse and compare big datasets. Rank-rank hypergeometric overlap is an important threshold-free method to combine and visualize two ranked lists of P-values or fold-changes, usually from differential gene expression analyses. Here, we introduce a new rank-rank hypergeometric overlap-based method aimed at both gene level and alternative splicing analyses at transcript or exon level, hitherto unreachable as transcript numbers are an order of magnitude larger than gene numbers.Results. We tested the tool on synthetic and real datasets at gene and transcript levels to detect correlation and anti-correlation patterns and found it to be fast and accurate, even on very large datasets thanks to an evolutionary algorithm based minimal P-value search. The tool comes with a ready-to-use permutation scheme allowing the computation of adjusted P-values at low time cost. Additionally, the package is a drop-in replacement to previous packages as a compatibility mode is included, allowing to re-run older studies with close to no change to existing pipelines. RedRibbon holds the promise to accurately extricate detailed information from large analyses.Availability. RNA-sequencing datasets are available through the Gene Expression Omnibus (GEO) portal with accession numbers GSE159984, GSE133218, GSE137136, GSE98485, GSE148058 and GSE108413. The C libraries and R package code are open to the community with a permissive licence (GPL3) and available for download from GitHub https://github.com/antpiron/ale, https://github.com/antpiron/cRedRibbon and https://github.com/antpiron/RedRibbon.},
note = {DOI: https://doi.org/10.1101/2022.08.31.505818},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Motivation. High throughput omics technologies have generated a wealth of large protein, gene and transcript datasets that have exacerbated the need for new methods to analyse and compare big datasets. Rank-rank hypergeometric overlap is an important threshold-free method to combine and visualize two ranked lists of P-values or fold-changes, usually from differential gene expression analyses. Here, we introduce a new rank-rank hypergeometric overlap-based method aimed at both gene level and alternative splicing analyses at transcript or exon level, hitherto unreachable as transcript numbers are an order of magnitude larger than gene numbers.Results. We tested the tool on synthetic and real datasets at gene and transcript levels to detect correlation and anti-correlation patterns and found it to be fast and accurate, even on very large datasets thanks to an evolutionary algorithm based minimal P-value search. The tool comes with a ready-to-use permutation scheme allowing the computation of adjusted P-values at low time cost. Additionally, the package is a drop-in replacement to previous packages as a compatibility mode is included, allowing to re-run older studies with close to no change to existing pipelines. RedRibbon holds the promise to accurately extricate detailed information from large analyses.Availability. RNA-sequencing datasets are available through the Gene Expression Omnibus (GEO) portal with accession numbers GSE159984, GSE133218, GSE137136, GSE98485, GSE148058 and GSE108413. The C libraries and R package code are open to the community with a permissive licence (GPL3) and available for download from GitHub https://github.com/antpiron/ale, https://github.com/antpiron/cRedRibbon and https://github.com/antpiron/RedRibbon. |
Han, The Anh T. A. H.; Lenaerts, Tom; Santos, Francisco C.; Pereira, Luís Moniz Voluntary safety commitments provide an escape from over-regulation in AI development Journal Article In: Technology in society, vol. 68, 2022, (DOI: 10.1016/j.techsoc.2021.101843). @article{info:hdl:2013/339040,
title = {Voluntary safety commitments provide an escape from over-regulation in AI development},
author = {The Anh T. A. H. Han and Tom Lenaerts and Francisco C. Santos and Luís Moniz Pereira},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/339040/3/AIES_agreement-2.pdf},
year = {2022},
date = {2022-01-01},
journal = {Technology in society},
volume = {68},
abstract = {With the introduction of Artificial Intelligence (AI) and related technologies in our daily lives, fear and anxiety about their misuse as well as their inherent biases, incorporated during their creation, have led to a demand for governance and associated regulation. Yet regulating an innovation process that is not well understood may stifle this process and reduce benefits that society may gain from the generated technology, even under the best intentions. Instruments to shed light on such processes are thus needed as they can ensure that imposed policies achieve the ambitions for which they were designed. Starting from a game-theoretical model that captures the fundamental dynamics of a race for domain supremacy using AI technology, we show how socially unwanted outcomes may be produced when sanctioning is applied unconditionally to risk-taking, i.e. potentially unsafe, behaviours. We demonstrate here the potential of a regulatory approach that combines a voluntary commitment approach reminiscent of soft law, wherein technologists have the freedom of choice between independently pursuing their course of actions or establishing binding agreements to act safely, with either a peer or governmental sanctioning system of those that do not abide by what they pledged. As commitments are binding and sanctioned, they go beyond the classic view of soft law, akin more closely to actual law-enforced regulation. Overall, this work reveals how voluntary but sanctionable commitments generate socially beneficial outcomes in all scenarios envisageable in a short-term race towards domain supremacy through AI technology. These results provide an original dynamic systems perspective of the governance potential of enforceable soft law techniques or co-regulatory mechanisms, showing how they may impact the ambitions of developers in the context of the AI-based applications.},
note = {DOI: 10.1016/j.techsoc.2021.101843},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
With the introduction of Artificial Intelligence (AI) and related technologies in our daily lives, fear and anxiety about their misuse as well as their inherent biases, incorporated during their creation, have led to a demand for governance and associated regulation. Yet regulating an innovation process that is not well understood may stifle this process and reduce benefits that society may gain from the generated technology, even under the best intentions. Instruments to shed light on such processes are thus needed as they can ensure that imposed policies achieve the ambitions for which they were designed. Starting from a game-theoretical model that captures the fundamental dynamics of a race for domain supremacy using AI technology, we show how socially unwanted outcomes may be produced when sanctioning is applied unconditionally to risk-taking, i.e. potentially unsafe, behaviours. We demonstrate here the potential of a regulatory approach that combines a voluntary commitment approach reminiscent of soft law, wherein technologists have the freedom of choice between independently pursuing their course of actions or establishing binding agreements to act safely, with either a peer or governmental sanctioning system of those that do not abide by what they pledged. As commitments are binding and sanctioned, they go beyond the classic view of soft law, akin more closely to actual law-enforced regulation. Overall, this work reveals how voluntary but sanctionable commitments generate socially beneficial outcomes in all scenarios envisageable in a short-term race towards domain supremacy through AI technology. These results provide an original dynamic systems perspective of the governance potential of enforceable soft law techniques or co-regulatory mechanisms, showing how they may impact the ambitions of developers in the context of the AI-based applications. |
Montero-Porras, Eladio; Lenaerts, Tom; Gallotti, Riccardo; Gruji'c, Jelena Fast deliberation is related to unconditional behaviour in iterated Prisoners’ Dilemma experiments Journal Article In: Scientific Reports, vol. 12, no. 1, 2022, (DOI: 10.1038/s41598-022-24849-4). @article{info:hdl:2013/366631b,
title = {Fast deliberation is related to unconditional behaviour in iterated Prisoners’ Dilemma experiments},
author = {Eladio Montero-Porras and Tom Lenaerts and Riccardo Gallotti and Jelena Gruji'c},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/366631/1/doi_350275.pdf},
year = {2022},
date = {2022-01-01},
journal = {Scientific Reports},
volume = {12},
number = {1},
abstract = {Abstract People have different preferences for what they allocate for themselves and what they allocate to others in social dilemmas. These differences result from contextual reasons, intrinsic values, and social expectations. What is still an area of debate is whether these differences can be estimated from differences in each individual’s deliberation process. In this work, we analyse the participants’ reaction times in three different experiments of the Iterated Prisoner’s Dilemma with the Drift Diffusion Model, which links response times to the perceived difficulty of the decision task, the rate of accumulation of information (deliberation), and the intuitive attitudes towards the choices. The correlation between these results and the attitude of the participants towards the allocation of resources is then determined. We observe that individuals who allocated resources equally are correlated with more deliberation than highly cooperative or highly defective participants, who accumulate evidence more quickly to reach a decision. Also, the evidence collection is faster in fixed neighbour settings than in shuffled ones. Consequently, fast decisions do not distinguish cooperators from defectors in these experiments, but appear to separate those that are more reactive to the behaviour of others from those that act categorically.},
note = {DOI: 10.1038/s41598-022-24849-4},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Abstract People have different preferences for what they allocate for themselves and what they allocate to others in social dilemmas. These differences result from contextual reasons, intrinsic values, and social expectations. What is still an area of debate is whether these differences can be estimated from differences in each individual’s deliberation process. In this work, we analyse the participants’ reaction times in three different experiments of the Iterated Prisoner’s Dilemma with the Drift Diffusion Model, which links response times to the perceived difficulty of the decision task, the rate of accumulation of information (deliberation), and the intuitive attitudes towards the choices. The correlation between these results and the attitude of the participants towards the allocation of resources is then determined. We observe that individuals who allocated resources equally are correlated with more deliberation than highly cooperative or highly defective participants, who accumulate evidence more quickly to reach a decision. Also, the evidence collection is faster in fixed neighbour settings than in shuffled ones. Consequently, fast decisions do not distinguish cooperators from defectors in these experiments, but appear to separate those that are more reactive to the behaviour of others from those that act categorically. |
Montero-Porras, Eladio; Grujić, Jelena; Domingos, Elias Fernandez; Lenaerts, Tom Inferring strategies from observations in long iterated Prisoner’s dilemma experiments Journal Article In: Scientific reports, vol. 12, no. 1, 2022, (DOI: 10.1038/s41598-022-11654-2). @article{info:hdl:2013/344327c,
title = {Inferring strategies from observations in long iterated Prisoner’s dilemma experiments},
author = {Eladio Montero-Porras and Jelena Grujić and Elias Fernandez Domingos and Tom Lenaerts},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/344327/1/doi_327971.pdf},
year = {2022},
date = {2022-01-01},
journal = {Scientific reports},
volume = {12},
number = {1},
abstract = {While many theoretical studies have revealed the strategies that could lead to and maintain cooperation in the Iterated Prisoner’s dilemma, less is known about what human participants actually do in this game and how strategies change when being confronted with anonymous partners in each round. Previous attempts used short experiments, made different assumptions of possible strategies, and led to very different conclusions. We present here two long treatments that differ in the partner matching strategy used, i.e. fixed or shuffled partners. Here we use unsupervised methods to cluster the players based on their actions and then Hidden Markov Model to infer what the memory-one strategies are in each cluster. Analysis of the inferred strategies reveals that fixed partner interaction leads to behavioral self-organization. Shuffled partners generate subgroups of memory-one strategies that remain entangled, apparently blocking the self-selection process that leads to fully cooperating participants in the fixed partner treatment. Analyzing the latter in more detail shows that AllC, AllD, TFT- and WSLS-like behavior can be observed. This study also reveals that long treatments are needed as experiments with less than 25 rounds capture mostly the learning phase participants go through in these kinds of experiments.},
note = {DOI: 10.1038/s41598-022-11654-2},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
While many theoretical studies have revealed the strategies that could lead to and maintain cooperation in the Iterated Prisoner’s dilemma, less is known about what human participants actually do in this game and how strategies change when being confronted with anonymous partners in each round. Previous attempts used short experiments, made different assumptions of possible strategies, and led to very different conclusions. We present here two long treatments that differ in the partner matching strategy used, i.e. fixed or shuffled partners. Here we use unsupervised methods to cluster the players based on their actions and then Hidden Markov Model to infer what the memory-one strategies are in each cluster. Analysis of the inferred strategies reveals that fixed partner interaction leads to behavioral self-organization. Shuffled partners generate subgroups of memory-one strategies that remain entangled, apparently blocking the self-selection process that leads to fully cooperating participants in the fixed partner treatment. Analyzing the latter in more detail shows that AllC, AllD, TFT- and WSLS-like behavior can be observed. This study also reveals that long treatments are needed as experiments with less than 25 rounds capture mostly the learning phase participants go through in these kinds of experiments. |
Domingos, Elias Fernandez; Terrucha, Ines; Suchon, Remi; Grujić, Jelena; Burguillo, Juan J. C.; Santos, Francisco C.; Lenaerts, Tom Delegation to artificial agents fosters prosocial behaviors in the collective risk dilemma Journal Article In: Scientific reports, vol. 12, no. 1, 2022, (DOI: 10.1038/s41598-022-11518-9). @article{info:hdl:2013/349554b,
title = {Delegation to artificial agents fosters prosocial behaviors in the collective risk dilemma},
author = {Elias Fernandez Domingos and Ines Terrucha and Remi Suchon and Jelena Grujić and Juan J. C. Burguillo and Francisco C. Santos and Tom Lenaerts},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/349554/1/doi_333198.pdf},
year = {2022},
date = {2022-01-01},
journal = {Scientific reports},
volume = {12},
number = {1},
abstract = {Home assistant chat-bots, self-driving cars, drones or automated negotiation systems are some of the several examples of autonomous (artificial) agents that have pervaded our society. These agents enable the automation of multiple tasks, saving time and (human) effort. However, their presence in social settings raises the need for a better understanding of their effect on social interactions and how they may be used to enhance cooperation towards the public good, instead of hindering it. To this end, we present an experimental study of human delegation to autonomous agents and hybrid human-agent interactions centered on a non-linear public goods dilemma with uncertain returns in which participants face a collective risk. Our aim is to understand experimentally whether the presence of autonomous agents has a positive or negative impact on social behaviour, equality and cooperation in such a dilemma. Our results show that cooperation and group success increases when participants delegate their actions to an artificial agent that plays on their behalf. Yet, this positive effect is less pronounced when humans interact in hybrid human-agent groups, where we mostly observe that humans in successful hybrid groups make higher contributions earlier in the game. Also, we show that participants wrongly believe that artificial agents will contribute less to the collective effort. In general, our results suggest that delegation to autonomous agents has the potential to work as commitment devices, which prevent both the temptation to deviate to an alternate (less collectively good) course of action, as well as limiting responses based on betrayal aversion.},
note = {DOI: 10.1038/s41598-022-11518-9},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Home assistant chat-bots, self-driving cars, drones or automated negotiation systems are some of the several examples of autonomous (artificial) agents that have pervaded our society. These agents enable the automation of multiple tasks, saving time and (human) effort. However, their presence in social settings raises the need for a better understanding of their effect on social interactions and how they may be used to enhance cooperation towards the public good, instead of hindering it. To this end, we present an experimental study of human delegation to autonomous agents and hybrid human-agent interactions centered on a non-linear public goods dilemma with uncertain returns in which participants face a collective risk. Our aim is to understand experimentally whether the presence of autonomous agents has a positive or negative impact on social behaviour, equality and cooperation in such a dilemma. Our results show that cooperation and group success increases when participants delegate their actions to an artificial agent that plays on their behalf. Yet, this positive effect is less pronounced when humans interact in hybrid human-agent groups, where we mostly observe that humans in successful hybrid groups make higher contributions earlier in the game. Also, we show that participants wrongly believe that artificial agents will contribute less to the collective effort. In general, our results suggest that delegation to autonomous agents has the potential to work as commitment devices, which prevent both the temptation to deviate to an alternate (less collectively good) course of action, as well as limiting responses based on betrayal aversion. |
Piron, Anthony; Colli, Maikel Luis; Defrance, Matthieu; Eizirik, Decio L.; Mercader, Josep Maria; Cnop, Miriam Identification of novel type 1 and type 2 diabetes genes by colocalisation of human islet eQTL and GWAS variants Miscellaneous 2022, (Conference: EASD Annual Meeting of the European Association for the Study of Diabetes(58th: 19 - 23 September 2022: Stockholm, Sweden)). @misc{info:hdl:2013/353214b,
title = {Identification of novel type 1 and type 2 diabetes genes by colocalisation of human islet eQTL and GWAS variants},
author = {Anthony Piron and Maikel Luis Colli and Matthieu Defrance and Decio L. Eizirik and Josep Maria Mercader and Miriam Cnop},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/353214/1/doi_336858.pdf},
year = {2022},
date = {2022-01-01},
note = {Conference: EASD Annual Meeting of the European Association for the Study of Diabetes(58th: 19 - 23 September 2022: Stockholm, Sweden)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Piron, Anthony; Szymczak, Florian; Alvelos, Maria De Oliveira; Defrance, Matthieu; Lenaerts, Tom; Eizirik, Decio L.; Cnop, Miriam RedRibbon: A new rank-rank hypergeometric overlap pipeline to compare gene and transcript expression signatures Journal Article In: BioRxiv, 2022, (DOI: https://doi.org/10.1101/2022.08.31.505818). @article{info:hdl:2013/353212c,
title = {RedRibbon: A new rank-rank hypergeometric overlap pipeline to compare gene and transcript expression signatures},
author = {Anthony Piron and Florian Szymczak and Maria De Oliveira Alvelos and Matthieu Defrance and Tom Lenaerts and Decio L. Eizirik and Miriam Cnop},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/353212/3/2022.08.31.505818v1.full.pdf},
year = {2022},
date = {2022-01-01},
journal = {BioRxiv},
abstract = {Motivation. High throughput omics technologies have generated a wealth of large protein, gene and transcript datasets that have exacerbated the need for new methods to analyse and compare big datasets. Rank-rank hypergeometric overlap is an important threshold-free method to combine and visualize two ranked lists of P-values or fold-changes, usually from differential gene expression analyses. Here, we introduce a new rank-rank hypergeometric overlap-based method aimed at both gene level and alternative splicing analyses at transcript or exon level, hitherto unreachable as transcript numbers are an order of magnitude larger than gene numbers.Results. We tested the tool on synthetic and real datasets at gene and transcript levels to detect correlation and anti-correlation patterns and found it to be fast and accurate, even on very large datasets thanks to an evolutionary algorithm based minimal P-value search. The tool comes with a ready-to-use permutation scheme allowing the computation of adjusted P-values at low time cost. Additionally, the package is a drop-in replacement to previous packages as a compatibility mode is included, allowing to re-run older studies with close to no change to existing pipelines. RedRibbon holds the promise to accurately extricate detailed information from large analyses.Availability. RNA-sequencing datasets are available through the Gene Expression Omnibus (GEO) portal with accession numbers GSE159984, GSE133218, GSE137136, GSE98485, GSE148058 and GSE108413. The C libraries and R package code are open to the community with a permissive licence (GPL3) and available for download from GitHub https://github.com/antpiron/ale, https://github.com/antpiron/cRedRibbon and https://github.com/antpiron/RedRibbon.},
note = {DOI: https://doi.org/10.1101/2022.08.31.505818},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Motivation. High throughput omics technologies have generated a wealth of large protein, gene and transcript datasets that have exacerbated the need for new methods to analyse and compare big datasets. Rank-rank hypergeometric overlap is an important threshold-free method to combine and visualize two ranked lists of P-values or fold-changes, usually from differential gene expression analyses. Here, we introduce a new rank-rank hypergeometric overlap-based method aimed at both gene level and alternative splicing analyses at transcript or exon level, hitherto unreachable as transcript numbers are an order of magnitude larger than gene numbers.Results. We tested the tool on synthetic and real datasets at gene and transcript levels to detect correlation and anti-correlation patterns and found it to be fast and accurate, even on very large datasets thanks to an evolutionary algorithm based minimal P-value search. The tool comes with a ready-to-use permutation scheme allowing the computation of adjusted P-values at low time cost. Additionally, the package is a drop-in replacement to previous packages as a compatibility mode is included, allowing to re-run older studies with close to no change to existing pipelines. RedRibbon holds the promise to accurately extricate detailed information from large analyses.Availability. RNA-sequencing datasets are available through the Gene Expression Omnibus (GEO) portal with accession numbers GSE159984, GSE133218, GSE137136, GSE98485, GSE148058 and GSE108413. The C libraries and R package code are open to the community with a permissive licence (GPL3) and available for download from GitHub https://github.com/antpiron/ale, https://github.com/antpiron/cRedRibbon and https://github.com/antpiron/RedRibbon. |
Ciortan, Madalina; Defrance, Matthieu GNN-based embedding for clustering scRNA-seq data Journal Article In: Bioinformatics, vol. 38, no. 4, pp. 1037-1044, 2022, (DOI: 10.1093/bioinformatics/btab787). @article{info:hdl:2013/343811c,
title = {GNN-based embedding for clustering scRNA-seq data},
author = {Madalina Ciortan and Matthieu Defrance},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/343811/3/btab787.pdf},
year = {2022},
date = {2022-01-01},
journal = {Bioinformatics},
volume = {38},
number = {4},
pages = {1037-1044},
abstract = {Abstract Motivation Single-cell RNA sequencing (scRNA-seq) provides transcriptomic profiling for individual cells, allowing researchers to study the heterogeneity of tissues, recognize rare cell identities and discover new cellular subtypes. Clustering analysis is usually used to predict cell class assignments and infer cell identities. However, the high sparsity of scRNA-seq data, accentuated by dropout events generates challenges that have motivated the development of numerous dedicated clustering methods. Nevertheless, there is still no consensus on the best performing method. Results graph-sc is a new method leveraging a graph autoencoder network to create embeddings for scRNA-seq cell data. While this work analyzes the performance of clustering the embeddings with various clustering algorithms, other downstream tasks can also be performed. A broad experimental study has been performed on both simulated and scRNA-seq datasets. The results indicate that although there is no consistently best method across all the analyzed datasets, graph-sc compares favorably to competing techniques across all types of datasets. Furthermore, the proposed method is stable across consecutive runs, robust to input down-sampling, generally insensitive to changes in the network architecture or training parameters and more computationally efficient than other competing methods based on neural networks. Modeling the data as a graph provides increased flexibility to define custom features characterizing the genes, the cells and their interactions. Moreover, external data (e.g. gene network) can easily be integrated into the graph and used seamlessly under the same optimization task. Availability and implementation https://github.com/ciortanmadalina/graph-sc. Supplementary information Supplementary data are available at Bioinformatics online.},
note = {DOI: 10.1093/bioinformatics/btab787},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Abstract Motivation Single-cell RNA sequencing (scRNA-seq) provides transcriptomic profiling for individual cells, allowing researchers to study the heterogeneity of tissues, recognize rare cell identities and discover new cellular subtypes. Clustering analysis is usually used to predict cell class assignments and infer cell identities. However, the high sparsity of scRNA-seq data, accentuated by dropout events generates challenges that have motivated the development of numerous dedicated clustering methods. Nevertheless, there is still no consensus on the best performing method. Results graph-sc is a new method leveraging a graph autoencoder network to create embeddings for scRNA-seq cell data. While this work analyzes the performance of clustering the embeddings with various clustering algorithms, other downstream tasks can also be performed. A broad experimental study has been performed on both simulated and scRNA-seq datasets. The results indicate that although there is no consistently best method across all the analyzed datasets, graph-sc compares favorably to competing techniques across all types of datasets. Furthermore, the proposed method is stable across consecutive runs, robust to input down-sampling, generally insensitive to changes in the network architecture or training parameters and more computationally efficient than other competing methods based on neural networks. Modeling the data as a graph provides increased flexibility to define custom features characterizing the genes, the cells and their interactions. Moreover, external data (e.g. gene network) can easily be integrated into the graph and used seamlessly under the same optimization task. Availability and implementation https://github.com/ciortanmadalina/graph-sc. Supplementary information Supplementary data are available at Bioinformatics online. |
Rivière, Quentin; Corso, Massimiliano; Ciortan, Madalina; Noël, Grégoire; Verbruggen, Nathalie; Defrance, Matthieu Exploiting Genomic Features to Improve the Prediction of Transcription Factor-Binding Sites in Plants. Journal Article In: Plant and Cell Physiology, vol. 63, no. 10, pp. 1457-1473, 2022, (DOI: 10.1093/pcp/pcac095). @article{info:hdl:2013/352290b,
title = {Exploiting Genomic Features to Improve the Prediction of Transcription Factor-Binding Sites in Plants.},
author = {Quentin Rivière and Massimiliano Corso and Madalina Ciortan and Grégoire Noël and Nathalie Verbruggen and Matthieu Defrance},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/352290/3/Riviere_et_al.pdf},
year = {2022},
date = {2022-01-01},
journal = {Plant and Cell Physiology},
volume = {63},
number = {10},
pages = {1457-1473},
abstract = {The identification of transcription factor (TF) target genes is central in biology. A popular approach is based on the location by pattern matching of potential cis-regulatory elements (CREs). During the last few years, tools integrating next-generation sequencing data have been developed to improve the performance of pattern matching. However, such tools have not yet been comprehensively evaluated in plants. Hence, we developed a new streamlined method aiming at predicting CREs and target genes of plant TFs in specific organs or conditions. Our approach implements a supervised machine learning strategy, which allows decision rule models to be learnt using TF ChIP-chip/seq experimental data. Different layers of genomic features were integrated in predictive models: the position on the gene, the DNA sequence conservation, the chromatin state and various CRE footprints. Among the tested features, the chromatin features were crucial for improving the accuracy of the method. Furthermore, we evaluated the transferability of predictive models across TFs, organs and species. Finally, we validated our method by correctly inferring the target genes of key TFs controlling metabolite biosynthesis at the organ level in Arabidopsis. We developed a tool-Wimtrap-to reproduce our approach in plant species and conditions/organs for which ChIP-chip/seq data are available. Wimtrap is a user-friendly R package that supports an R Shiny web interface and is provided with pre-built models that can be used to quickly get predictions of CREs and TF gene targets in different organs or conditions in Arabidopsis thaliana, Solanum lycopersicum, Oryza sativa and Zea mays.},
note = {DOI: 10.1093/pcp/pcac095},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The identification of transcription factor (TF) target genes is central in biology. A popular approach is based on the location by pattern matching of potential cis-regulatory elements (CREs). During the last few years, tools integrating next-generation sequencing data have been developed to improve the performance of pattern matching. However, such tools have not yet been comprehensively evaluated in plants. Hence, we developed a new streamlined method aiming at predicting CREs and target genes of plant TFs in specific organs or conditions. Our approach implements a supervised machine learning strategy, which allows decision rule models to be learnt using TF ChIP-chip/seq experimental data. Different layers of genomic features were integrated in predictive models: the position on the gene, the DNA sequence conservation, the chromatin state and various CRE footprints. Among the tested features, the chromatin features were crucial for improving the accuracy of the method. Furthermore, we evaluated the transferability of predictive models across TFs, organs and species. Finally, we validated our method by correctly inferring the target genes of key TFs controlling metabolite biosynthesis at the organ level in Arabidopsis. We developed a tool-Wimtrap-to reproduce our approach in plant species and conditions/organs for which ChIP-chip/seq data are available. Wimtrap is a user-friendly R package that supports an R Shiny web interface and is provided with pre-built models that can be used to quickly get predictions of CREs and TF gene targets in different organs or conditions in Arabidopsis thaliana, Solanum lycopersicum, Oryza sativa and Zea mays. |
Bizet, Martin; Defrance, Matthieu; Calonne, Emilie; Bontempi, Gianluca; Sotiriou, Christos; Fuks, Franccois; Jeschke, Jana Improving Infinium MethylationEPIC data processing: re-annotation of enhancers and long noncoding RNA genes and benchmarking of normalization methods. Journal Article In: Epigenetics, vol. 17, no. 13, pp. 2434-2454, 2022, (DOI: 10.1080/15592294.2022.2135201). @article{info:hdl:2013/353467d,
title = {Improving Infinium MethylationEPIC data processing: re-annotation of enhancers and long noncoding RNA genes and benchmarking of normalization methods.},
author = {Martin Bizet and Matthieu Defrance and Emilie Calonne and Gianluca Bontempi and Christos Sotiriou and Franccois Fuks and Jana Jeschke},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/353467/5/KEPI_17_2135201.pdf},
year = {2022},
date = {2022-01-01},
journal = {Epigenetics},
volume = {17},
number = {13},
pages = {2434-2454},
abstract = {Illumina Infinium DNA Methylation (5mC) arrays are a popular technology for low-cost, high-throughput, genome-scale measurement of 5mC distribution, especially in cancer and other complex diseases. After the success of its HumanMethylation450 array (450k), Illumina released the MethylationEPIC array (850k) featuring increased coverage of enhancers. Despite the widespread use of 850k, analysis of the corresponding data remains suboptimal: it still relies mostly on Illumina's default annotation, which underestimates enhancerss and long noncoding RNAs. Results: We have thus developed an approach, based on the ENCODE and LNCipedia databases, which greatly improves upon Illumina's default annotation of enhancers and long noncoding transcripts. We compared the re-annotated 850k with both 450k and reduced-representation bisulphite sequencing (RRBS), another high-throughput 5mC profiling technology. We found 850k to cover at least three times as many enhancers and long noncoding RNAs as either 450k or RRBS. We further investigated the reproducibility of the three technologies, applying various normalization methods to the 850k data. Most of these methods reduced variability to a level below that of RRBS data. We then used 850k with our new annotation and normalization to profile 5mC changes in breast cancer biopsies. 850k highlighted aberrant enhancer methylation as the predominant feature, in agreement with previous reports. Our study provides an updated processing approach for 850k data, based on refined probe annotation and normalization, allowing for improved analysis of methylation at enhancers and long noncoding RNA genes. Our findings will help to further advance understanding of the DNA methylome in health and disease.},
note = {DOI: 10.1080/15592294.2022.2135201},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Illumina Infinium DNA Methylation (5mC) arrays are a popular technology for low-cost, high-throughput, genome-scale measurement of 5mC distribution, especially in cancer and other complex diseases. After the success of its HumanMethylation450 array (450k), Illumina released the MethylationEPIC array (850k) featuring increased coverage of enhancers. Despite the widespread use of 850k, analysis of the corresponding data remains suboptimal: it still relies mostly on Illumina's default annotation, which underestimates enhancerss and long noncoding RNAs. Results: We have thus developed an approach, based on the ENCODE and LNCipedia databases, which greatly improves upon Illumina's default annotation of enhancers and long noncoding transcripts. We compared the re-annotated 850k with both 450k and reduced-representation bisulphite sequencing (RRBS), another high-throughput 5mC profiling technology. We found 850k to cover at least three times as many enhancers and long noncoding RNAs as either 450k or RRBS. We further investigated the reproducibility of the three technologies, applying various normalization methods to the 850k data. Most of these methods reduced variability to a level below that of RRBS data. We then used 850k with our new annotation and normalization to profile 5mC changes in breast cancer biopsies. 850k highlighted aberrant enhancer methylation as the predominant feature, in agreement with previous reports. Our study provides an updated processing approach for 850k data, based on refined probe annotation and normalization, allowing for improved analysis of methylation at enhancers and long noncoding RNA genes. Our findings will help to further advance understanding of the DNA methylome in health and disease. |
Grolaux, Robin; Hardy, Alexis; Olsen, Catharina; Dooren, Sonia Van; Smits, Guillaume; Defrance, Matthieu Identification of differentially methylated regions in rare diseases from a single-patient perspective Journal Article In: Clinical Epigenetics, vol. 14, no. 1, 2022, (DOI: 10.1186/s13148-022-01403-7). @article{info:hdl:2013/353081b,
title = {Identification of differentially methylated regions in rare diseases from a single-patient perspective},
author = {Robin Grolaux and Alexis Hardy and Catharina Olsen and Sonia Van Dooren and Guillaume Smits and Matthieu Defrance},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/353081/1/doi_336725.pdf},
year = {2022},
date = {2022-01-01},
journal = {Clinical Epigenetics},
volume = {14},
number = {1},
abstract = {Abstract Background DNA methylation (5-mC) is being widely recognized as an alternative in the detection of sequence variants in the diagnosis of some rare neurodevelopmental and imprinting disorders. Identification of alterations in DNA methylation plays an important role in the diagnosis and understanding of the etiology of those disorders. Canonical pipelines for the detection of differentially methylated regions (DMRs) usually rely on inter-group (e.g., case versus control) comparisons. However, these tools might perform suboptimally in the context of rare diseases and multilocus imprinting disturbances due to small cohort sizes and inter-patient heterogeneity. Therefore, there is a need to provide a simple but statistically robust pipeline for scientists and clinicians to perform differential methylation analyses at the single patient level as well as to evaluate how parameter fine-tuning may affect differentially methylated region detection. Result We implemented an improved statistical method to detect differentially methylated regions in correlated datasets based on the Z-score and empirical Brown aggregation methods from a single-patient perspective. To accurately assess the predictive power of our method, we generated semi-simulated data using a public control population of 521 samples and investigated how the size of the control population, methylation difference, and region size affect DMR detection. In addition, we validated the detection of methylation events in patients suffering from rare multi-locus imprinting disturbance and evaluated how this method could complement existing tools in the context of clinical diagnosis. Conclusion In this study, we present a robust statistical method to perform differential methylation analysis at the single patient level and describe its optimal parameters to increase DMRs identification performance. Finally, we show its diagnostic utility when applied to rare disorders.},
note = {DOI: 10.1186/s13148-022-01403-7},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Abstract Background DNA methylation (5-mC) is being widely recognized as an alternative in the detection of sequence variants in the diagnosis of some rare neurodevelopmental and imprinting disorders. Identification of alterations in DNA methylation plays an important role in the diagnosis and understanding of the etiology of those disorders. Canonical pipelines for the detection of differentially methylated regions (DMRs) usually rely on inter-group (e.g., case versus control) comparisons. However, these tools might perform suboptimally in the context of rare diseases and multilocus imprinting disturbances due to small cohort sizes and inter-patient heterogeneity. Therefore, there is a need to provide a simple but statistically robust pipeline for scientists and clinicians to perform differential methylation analyses at the single patient level as well as to evaluate how parameter fine-tuning may affect differentially methylated region detection. Result We implemented an improved statistical method to detect differentially methylated regions in correlated datasets based on the Z-score and empirical Brown aggregation methods from a single-patient perspective. To accurately assess the predictive power of our method, we generated semi-simulated data using a public control population of 521 samples and investigated how the size of the control population, methylation difference, and region size affect DMR detection. In addition, we validated the detection of methylation events in patients suffering from rare multi-locus imprinting disturbance and evaluated how this method could complement existing tools in the context of clinical diagnosis. Conclusion In this study, we present a robust statistical method to perform differential methylation analysis at the single patient level and describe its optimal parameters to increase DMRs identification performance. Finally, we show its diagnostic utility when applied to rare disorders. |
Marquis, Bastien; Jansen, Maarten Information criteria bias correction for group selection Journal Article In: Statistical papers, vol. 63, no. 5, pp. 1387-1414, 2022, (Language of publication: fr). @article{info:hdl:2013/335472c,
title = {Information criteria bias correction for group selection},
author = {Bastien Marquis and Maarten Jansen},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/335472/3/marquis22groupmirror.pdf},
year = {2022},
date = {2022-01-01},
journal = {Statistical papers},
volume = {63},
number = {5},
pages = {1387-1414},
note = {Language of publication: fr},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Jansen, Maarten Wavelets from a Statistical Perspective Book CRC Press, 2022, (Language of publication: fr). @book{info:hdl:2013/333285,
title = {Wavelets from a Statistical Perspective},
author = {Maarten Jansen},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/333285/3/jansen21waveletstatCRCanswers.pdf},
year = {2022},
date = {2022-01-01},
publisher = {CRC Press},
note = {Language of publication: fr},
keywords = {},
pubstate = {published},
tppubtype = {book}
}
|
Simar, Cédric; Petit, Robin; Bozga, Nichita; Leroy, Axelle; Alvarez, Ana Maria Cebolla; Petieau, Mathieu; Bontempi, Gianluca; Chéron, Guy Riemannian classification of single-trial surface EEG and sources during checkerboard and navigational images in humans. Journal Article In: PloS one, vol. 17, no. 1, pp. e0262417, 2022, (DOI: 10.1371/journal.pone.0262417). @article{info:hdl:2013/366038b,
title = {Riemannian classification of single-trial surface EEG and sources during checkerboard and navigational images in humans.},
author = {Cédric Simar and Robin Petit and Nichita Bozga and Axelle Leroy and Ana Maria Cebolla Alvarez and Mathieu Petieau and Gianluca Bontempi and Guy Chéron},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/366038/1/doi_349682.pdf},
year = {2022},
date = {2022-01-01},
journal = {PloS one},
volume = {17},
number = {1},
pages = {e0262417},
abstract = {Different visual stimuli are classically used for triggering visual evoked potentials comprising well-defined components linked to the content of the displayed image. These evoked components result from the average of ongoing EEG signals in which additive and oscillatory mechanisms contribute to the component morphology. The evoked related potentials often resulted from a mixed situation (power variation and phase-locking) making basic and clinical interpretations difficult. Besides, the grand average methodology produced artificial constructs that do not reflect individual peculiarities. This motivated new approaches based on single-trial analysis as recently used in the brain-computer interface field.},
note = {DOI: 10.1371/journal.pone.0262417},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Different visual stimuli are classically used for triggering visual evoked potentials comprising well-defined components linked to the content of the displayed image. These evoked components result from the average of ongoing EEG signals in which additive and oscillatory mechanisms contribute to the component morphology. The evoked related potentials often resulted from a mixed situation (power variation and phase-locking) making basic and clinical interpretations difficult. Besides, the grand average methodology produced artificial constructs that do not reflect individual peculiarities. This motivated new approaches based on single-trial analysis as recently used in the brain-computer interface field. |
Bizet, Martin; Defrance, Matthieu; Calonne, Emilie; Bontempi, Gianluca; Sotiriou, Christos; Fuks, Franccois; Jeschke, Jana Improving Infinium MethylationEPIC data processing: re-annotation of enhancers and long noncoding RNA genes and benchmarking of normalization methods. Journal Article In: Epigenetics, vol. 17, no. 13, pp. 2434-2454, 2022, (DOI: 10.1080/15592294.2022.2135201). @article{info:hdl:2013/353467c,
title = {Improving Infinium MethylationEPIC data processing: re-annotation of enhancers and long noncoding RNA genes and benchmarking of normalization methods.},
author = {Martin Bizet and Matthieu Defrance and Emilie Calonne and Gianluca Bontempi and Christos Sotiriou and Franccois Fuks and Jana Jeschke},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/353467/5/KEPI_17_2135201.pdf},
year = {2022},
date = {2022-01-01},
journal = {Epigenetics},
volume = {17},
number = {13},
pages = {2434-2454},
abstract = {Illumina Infinium DNA Methylation (5mC) arrays are a popular technology for low-cost, high-throughput, genome-scale measurement of 5mC distribution, especially in cancer and other complex diseases. After the success of its HumanMethylation450 array (450k), Illumina released the MethylationEPIC array (850k) featuring increased coverage of enhancers. Despite the widespread use of 850k, analysis of the corresponding data remains suboptimal: it still relies mostly on Illumina's default annotation, which underestimates enhancerss and long noncoding RNAs. Results: We have thus developed an approach, based on the ENCODE and LNCipedia databases, which greatly improves upon Illumina's default annotation of enhancers and long noncoding transcripts. We compared the re-annotated 850k with both 450k and reduced-representation bisulphite sequencing (RRBS), another high-throughput 5mC profiling technology. We found 850k to cover at least three times as many enhancers and long noncoding RNAs as either 450k or RRBS. We further investigated the reproducibility of the three technologies, applying various normalization methods to the 850k data. Most of these methods reduced variability to a level below that of RRBS data. We then used 850k with our new annotation and normalization to profile 5mC changes in breast cancer biopsies. 850k highlighted aberrant enhancer methylation as the predominant feature, in agreement with previous reports. Our study provides an updated processing approach for 850k data, based on refined probe annotation and normalization, allowing for improved analysis of methylation at enhancers and long noncoding RNA genes. Our findings will help to further advance understanding of the DNA methylome in health and disease.},
note = {DOI: 10.1080/15592294.2022.2135201},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Illumina Infinium DNA Methylation (5mC) arrays are a popular technology for low-cost, high-throughput, genome-scale measurement of 5mC distribution, especially in cancer and other complex diseases. After the success of its HumanMethylation450 array (450k), Illumina released the MethylationEPIC array (850k) featuring increased coverage of enhancers. Despite the widespread use of 850k, analysis of the corresponding data remains suboptimal: it still relies mostly on Illumina's default annotation, which underestimates enhancerss and long noncoding RNAs. Results: We have thus developed an approach, based on the ENCODE and LNCipedia databases, which greatly improves upon Illumina's default annotation of enhancers and long noncoding transcripts. We compared the re-annotated 850k with both 450k and reduced-representation bisulphite sequencing (RRBS), another high-throughput 5mC profiling technology. We found 850k to cover at least three times as many enhancers and long noncoding RNAs as either 450k or RRBS. We further investigated the reproducibility of the three technologies, applying various normalization methods to the 850k data. Most of these methods reduced variability to a level below that of RRBS data. We then used 850k with our new annotation and normalization to profile 5mC changes in breast cancer biopsies. 850k highlighted aberrant enhancer methylation as the predominant feature, in agreement with previous reports. Our study provides an updated processing approach for 850k data, based on refined probe annotation and normalization, allowing for improved analysis of methylation at enhancers and long noncoding RNA genes. Our findings will help to further advance understanding of the DNA methylome in health and disease. |
Montero-Porras, Eladio; Grujić, Jelena; Domingos, Elias Fernandez; Lenaerts, Tom Inferring Strategies from Observations in Long Iterated Prisoner’s Dilemma Experiments Miscellaneous 2022, (Conference: International Conference on Social Dilemmas(19-22/07/2022: Coppenhagen, Denmark)). @misc{info:hdl:2013/366679,
title = {Inferring Strategies from Observations in Long Iterated Prisoner’s Dilemma Experiments},
author = {Eladio Montero-Porras and Jelena Grujić and Elias Fernandez Domingos and Tom Lenaerts},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/366679/3/Inferring_Strategies.pdf},
year = {2022},
date = {2022-01-01},
note = {Conference: International Conference on Social Dilemmas(19-22/07/2022: Coppenhagen, Denmark)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Versbraegen, Nassim; Gravel, Barbara; Nachtegael, Charlotte; Renaux, Alexandre; Verkinderen, Emma; Nowé, Ann; Lenaerts, Tom; Papadimitriou, Sofia Taking the prediction of pathogenic variant-combinations to the next level with VarCoPP2.0 Miscellaneous 2022, (Conference: European Conference on Computational Biology (21: 12-21 September 2022: Sitges, Barcelona)). @misc{info:hdl:2013/352566,
title = {Taking the prediction of pathogenic variant-combinations to the next level with VarCoPP2.0},
author = {Nassim Versbraegen and Barbara Gravel and Charlotte Nachtegael and Alexandre Renaux and Emma Verkinderen and Ann Nowé and Tom Lenaerts and Sofia Papadimitriou},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/352566},
year = {2022},
date = {2022-01-01},
note = {Conference: European Conference on Computational Biology (21: 12-21 September 2022: Sitges, Barcelona)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Montero-Porras, Eladio; Gruji'c, Jelena; Domingos, Elias Fernandez; Lenaerts, Tom Inferring Strategies from Observations in Long Iterated Prisoner’s Dilemma Experiments Miscellaneous 2022, (Conference: Complex Systems Conference 2022(17-21/10/2022: Palma de Mallorca, Spain)). @misc{info:hdl:2013/366678,
title = {Inferring Strategies from Observations in Long Iterated Prisoner’s Dilemma Experiments},
author = {Eladio Montero-Porras and Jelena Gruji'c and Elias Fernandez Domingos and Tom Lenaerts},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/366678/3/Inferring_Strategies.pdf},
year = {2022},
date = {2022-01-01},
note = {Conference: Complex Systems Conference 2022(17-21/10/2022: Palma de Mallorca, Spain)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Abels, Axel; Lenaerts, Tom; Trianni, Vito; Nowé, Ann A New Approach to Handle Non-Stationarity in Collective Decision-Making Miscellaneous 2022, (Conference: ACM Collective Intelligence conference (CI)(Virtual)). @misc{info:hdl:2013/366666,
title = {A New Approach to Handle Non-Stationarity in Collective Decision-Making},
author = {Axel Abels and Tom Lenaerts and Vito Trianni and Ann Nowé},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/366666/3/CI2022-Non-Stationarity.pdf},
year = {2022},
date = {2022-01-01},
note = {Conference: ACM Collective Intelligence conference (CI)(Virtual)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Renaux, Alexandre; Terwagne, Chloé CT; Cochez, Michael; Tiddi, Ilaria; Nowé, Ann; Lenaerts, Tom A knowledge graph approach for interpretable prediction of pathogenic genetic interactions Miscellaneous 2022, (Conference: European Conference on Computational Biology (ECCB) 2022 (2022-07: Sitges, Spain)). @misc{info:hdl:2013/352608,
title = {A knowledge graph approach for interpretable prediction of pathogenic genetic interactions},
author = {Alexandre Renaux and Chloé CT Terwagne and Michael Cochez and Ilaria Tiddi and Ann Nowé and Tom Lenaerts},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/352608/3/f1000research-467591.pdf},
year = {2022},
date = {2022-01-01},
abstract = {An increasing number of clinical studies are reporting patterns of oligogenic inheritance in genetic diseases. Despite the advent of methods able to predict the pathogenicity of variant combinations, the underlying biological mechanisms remain unknown, since these models offer limited interpretability. To advance towards a better understanding of oligogenic disease aetiology, we developed a new interpretable predictive method based on a knowledge graph. This heterogenous network integrates curated oligogenic combinations together with multiple biological networks and biomedical ontologies. Our approach successfully captures association rules solely based on multi-hop relationships between genes. It combines them as a decision set model which can predict the pathogenicity of new gene pairs. These predictions come with explanations, obtained by querying the knowledge graph, which highlight relevant paths. The benchmarking of this model in a cross-validation setting achieves high accuracy and recalls independent gene pairs from recently published digenic combinations. The analysis of the rule-based paths highlights relevant contributors to the disease and shows the ability of this approach to generate knowledge-based hypotheses to investigate new disease mechanisms.},
note = {Conference: European Conference on Computational Biology (ECCB) 2022 (2022-07: Sitges, Spain)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
An increasing number of clinical studies are reporting patterns of oligogenic inheritance in genetic diseases. Despite the advent of methods able to predict the pathogenicity of variant combinations, the underlying biological mechanisms remain unknown, since these models offer limited interpretability. To advance towards a better understanding of oligogenic disease aetiology, we developed a new interpretable predictive method based on a knowledge graph. This heterogenous network integrates curated oligogenic combinations together with multiple biological networks and biomedical ontologies. Our approach successfully captures association rules solely based on multi-hop relationships between genes. It combines them as a decision set model which can predict the pathogenicity of new gene pairs. These predictions come with explanations, obtained by querying the knowledge graph, which highlight relevant paths. The benchmarking of this model in a cross-validation setting achieves high accuracy and recalls independent gene pairs from recently published digenic combinations. The analysis of the rule-based paths highlights relevant contributors to the disease and shows the ability of this approach to generate knowledge-based hypotheses to investigate new disease mechanisms. |
Nachtegael, Charlotte; Gravel, Barbara; Dillen, Arnau; Smits, Guillaume; Nowe, Ann; Papadimitriou, Sofia; Lenaerts, Tom Scaling up oligogenic diseases research with OLIDA: The Oligogenic Diseases Database Journal Article In: Database, vol. 2022, 2022, (DOI: 10.1093/database/baac023). @article{info:hdl:2013/342417b,
title = {Scaling up oligogenic diseases research with OLIDA: The Oligogenic Diseases Database},
author = {Charlotte Nachtegael and Barbara Gravel and Arnau Dillen and Guillaume Smits and Ann Nowe and Sofia Papadimitriou and Tom Lenaerts},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/342417/3/baac023.pdf},
year = {2022},
date = {2022-01-01},
journal = {Database},
volume = {2022},
abstract = {Improving the understanding of the oligogenic nature of diseases requires access to high-quality, well-curated Findable, Accessible, Interoperable, Reusable (FAIR) data. Although first steps were taken with the development of the Digenic Diseases Database, leading to novel computational advancements to assist the field, these were also linked with a number of limitations, for instance, the ad hoc curation protocol and the inclusion of only digenic cases. The OLIgogenic diseases DAtabase (OLIDA) presents a novel, transparent and rigorous curation protocol, introducing a confidence scoring mechanism for the published oligogenic literature. The application of this protocol on the oligogenic literature generated a new repository containing 916 oligogenic variant combinations linked to 159 distinct diseases. Information extracted from the scientific literature is supplemented with current knowledge support obtained from public databases. Each entry is an oligogenic combination linked to a disease, labelled with a confidence score based on the level of genetic and functional evidence that supports its involvement in this disease. These scores allow users to assess the relevance and proof of pathogenicity of each oligogenic combination in the database, constituting markers for reporting improvements on disease-causing oligogenic variant combinations. OLIDA follows the FAIR principles, providing detailed documentation, easy data access through its application programming interface and website, use of unique identifiers and links to existing ontologies. Database URL: https://olida.ibsquare.be},
note = {DOI: 10.1093/database/baac023},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Improving the understanding of the oligogenic nature of diseases requires access to high-quality, well-curated Findable, Accessible, Interoperable, Reusable (FAIR) data. Although first steps were taken with the development of the Digenic Diseases Database, leading to novel computational advancements to assist the field, these were also linked with a number of limitations, for instance, the ad hoc curation protocol and the inclusion of only digenic cases. The OLIgogenic diseases DAtabase (OLIDA) presents a novel, transparent and rigorous curation protocol, introducing a confidence scoring mechanism for the published oligogenic literature. The application of this protocol on the oligogenic literature generated a new repository containing 916 oligogenic variant combinations linked to 159 distinct diseases. Information extracted from the scientific literature is supplemented with current knowledge support obtained from public databases. Each entry is an oligogenic combination linked to a disease, labelled with a confidence score based on the level of genetic and functional evidence that supports its involvement in this disease. These scores allow users to assess the relevance and proof of pathogenicity of each oligogenic combination in the database, constituting markers for reporting improvements on disease-causing oligogenic variant combinations. OLIDA follows the FAIR principles, providing detailed documentation, easy data access through its application programming interface and website, use of unique identifiers and links to existing ontologies. Database URL: https://olida.ibsquare.be |
Montero-Porras, Eladio; Lenaerts, Tom; Gallotti, Riccardo; Gruji'c, Jelena Fast deliberation is related to unconditional behaviour in iterated Prisoners’ Dilemma experiments Journal Article In: Scientific Reports, vol. 12, no. 1, 2022, (DOI: 10.1038/s41598-022-24849-4). @article{info:hdl:2013/366631,
title = {Fast deliberation is related to unconditional behaviour in iterated Prisoners’ Dilemma experiments},
author = {Eladio Montero-Porras and Tom Lenaerts and Riccardo Gallotti and Jelena Gruji'c},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/366631/1/doi_350275.pdf},
year = {2022},
date = {2022-01-01},
journal = {Scientific Reports},
volume = {12},
number = {1},
abstract = {Abstract People have different preferences for what they allocate for themselves and what they allocate to others in social dilemmas. These differences result from contextual reasons, intrinsic values, and social expectations. What is still an area of debate is whether these differences can be estimated from differences in each individual’s deliberation process. In this work, we analyse the participants’ reaction times in three different experiments of the Iterated Prisoner’s Dilemma with the Drift Diffusion Model, which links response times to the perceived difficulty of the decision task, the rate of accumulation of information (deliberation), and the intuitive attitudes towards the choices. The correlation between these results and the attitude of the participants towards the allocation of resources is then determined. We observe that individuals who allocated resources equally are correlated with more deliberation than highly cooperative or highly defective participants, who accumulate evidence more quickly to reach a decision. Also, the evidence collection is faster in fixed neighbour settings than in shuffled ones. Consequently, fast decisions do not distinguish cooperators from defectors in these experiments, but appear to separate those that are more reactive to the behaviour of others from those that act categorically.},
note = {DOI: 10.1038/s41598-022-24849-4},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Abstract People have different preferences for what they allocate for themselves and what they allocate to others in social dilemmas. These differences result from contextual reasons, intrinsic values, and social expectations. What is still an area of debate is whether these differences can be estimated from differences in each individual’s deliberation process. In this work, we analyse the participants’ reaction times in three different experiments of the Iterated Prisoner’s Dilemma with the Drift Diffusion Model, which links response times to the perceived difficulty of the decision task, the rate of accumulation of information (deliberation), and the intuitive attitudes towards the choices. The correlation between these results and the attitude of the participants towards the allocation of resources is then determined. We observe that individuals who allocated resources equally are correlated with more deliberation than highly cooperative or highly defective participants, who accumulate evidence more quickly to reach a decision. Also, the evidence collection is faster in fixed neighbour settings than in shuffled ones. Consequently, fast decisions do not distinguish cooperators from defectors in these experiments, but appear to separate those that are more reactive to the behaviour of others from those that act categorically. |
Montero-Porras, Eladio; Grujić, Jelena; Domingos, Elias Fernandez; Lenaerts, Tom Inferring strategies from observations in long iterated Prisoner’s dilemma experiments Journal Article In: Scientific reports, vol. 12, no. 1, 2022, (DOI: 10.1038/s41598-022-11654-2). @article{info:hdl:2013/344327b,
title = {Inferring strategies from observations in long iterated Prisoner’s dilemma experiments},
author = {Eladio Montero-Porras and Jelena Grujić and Elias Fernandez Domingos and Tom Lenaerts},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/344327/1/doi_327971.pdf},
year = {2022},
date = {2022-01-01},
journal = {Scientific reports},
volume = {12},
number = {1},
abstract = {While many theoretical studies have revealed the strategies that could lead to and maintain cooperation in the Iterated Prisoner’s dilemma, less is known about what human participants actually do in this game and how strategies change when being confronted with anonymous partners in each round. Previous attempts used short experiments, made different assumptions of possible strategies, and led to very different conclusions. We present here two long treatments that differ in the partner matching strategy used, i.e. fixed or shuffled partners. Here we use unsupervised methods to cluster the players based on their actions and then Hidden Markov Model to infer what the memory-one strategies are in each cluster. Analysis of the inferred strategies reveals that fixed partner interaction leads to behavioral self-organization. Shuffled partners generate subgroups of memory-one strategies that remain entangled, apparently blocking the self-selection process that leads to fully cooperating participants in the fixed partner treatment. Analyzing the latter in more detail shows that AllC, AllD, TFT- and WSLS-like behavior can be observed. This study also reveals that long treatments are needed as experiments with less than 25 rounds capture mostly the learning phase participants go through in these kinds of experiments.},
note = {DOI: 10.1038/s41598-022-11654-2},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
While many theoretical studies have revealed the strategies that could lead to and maintain cooperation in the Iterated Prisoner’s dilemma, less is known about what human participants actually do in this game and how strategies change when being confronted with anonymous partners in each round. Previous attempts used short experiments, made different assumptions of possible strategies, and led to very different conclusions. We present here two long treatments that differ in the partner matching strategy used, i.e. fixed or shuffled partners. Here we use unsupervised methods to cluster the players based on their actions and then Hidden Markov Model to infer what the memory-one strategies are in each cluster. Analysis of the inferred strategies reveals that fixed partner interaction leads to behavioral self-organization. Shuffled partners generate subgroups of memory-one strategies that remain entangled, apparently blocking the self-selection process that leads to fully cooperating participants in the fixed partner treatment. Analyzing the latter in more detail shows that AllC, AllD, TFT- and WSLS-like behavior can be observed. This study also reveals that long treatments are needed as experiments with less than 25 rounds capture mostly the learning phase participants go through in these kinds of experiments. |
Piron, Anthony; Colli, Maikel Luis; Defrance, Matthieu; Eizirik, Decio L.; Mercader, Josep Maria; Cnop, Miriam Identification of novel type 1 and type 2 diabetes genes by colocalisation of human islet eQTL and GWAS variants Miscellaneous 2022, (Conference: EASD Annual Meeting of the European Association for the Study of Diabetes(58th: 19 - 23 September 2022: Stockholm, Sweden)). @misc{info:hdl:2013/353214,
title = {Identification of novel type 1 and type 2 diabetes genes by colocalisation of human islet eQTL and GWAS variants},
author = {Anthony Piron and Maikel Luis Colli and Matthieu Defrance and Decio L. Eizirik and Josep Maria Mercader and Miriam Cnop},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/353214/1/doi_336858.pdf},
year = {2022},
date = {2022-01-01},
note = {Conference: EASD Annual Meeting of the European Association for the Study of Diabetes(58th: 19 - 23 September 2022: Stockholm, Sweden)},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Piron, Anthony; Szymczak, Florian; Alvelos, Maria De Oliveira; Defrance, Matthieu; Lenaerts, Tom; Eizirik, Decio L.; Cnop, Miriam RedRibbon: A new rank-rank hypergeometric overlap pipeline to compare gene and transcript expression signatures Journal Article In: BioRxiv, 2022, (DOI: https://doi.org/10.1101/2022.08.31.505818). @article{info:hdl:2013/353212,
title = {RedRibbon: A new rank-rank hypergeometric overlap pipeline to compare gene and transcript expression signatures},
author = {Anthony Piron and Florian Szymczak and Maria De Oliveira Alvelos and Matthieu Defrance and Tom Lenaerts and Decio L. Eizirik and Miriam Cnop},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/353212/3/2022.08.31.505818v1.full.pdf},
year = {2022},
date = {2022-01-01},
journal = {BioRxiv},
abstract = {Motivation. High throughput omics technologies have generated a wealth of large protein, gene and transcript datasets that have exacerbated the need for new methods to analyse and compare big datasets. Rank-rank hypergeometric overlap is an important threshold-free method to combine and visualize two ranked lists of P-values or fold-changes, usually from differential gene expression analyses. Here, we introduce a new rank-rank hypergeometric overlap-based method aimed at both gene level and alternative splicing analyses at transcript or exon level, hitherto unreachable as transcript numbers are an order of magnitude larger than gene numbers.Results. We tested the tool on synthetic and real datasets at gene and transcript levels to detect correlation and anti-correlation patterns and found it to be fast and accurate, even on very large datasets thanks to an evolutionary algorithm based minimal P-value search. The tool comes with a ready-to-use permutation scheme allowing the computation of adjusted P-values at low time cost. Additionally, the package is a drop-in replacement to previous packages as a compatibility mode is included, allowing to re-run older studies with close to no change to existing pipelines. RedRibbon holds the promise to accurately extricate detailed information from large analyses.Availability. RNA-sequencing datasets are available through the Gene Expression Omnibus (GEO) portal with accession numbers GSE159984, GSE133218, GSE137136, GSE98485, GSE148058 and GSE108413. The C libraries and R package code are open to the community with a permissive licence (GPL3) and available for download from GitHub https://github.com/antpiron/ale, https://github.com/antpiron/cRedRibbon and https://github.com/antpiron/RedRibbon.},
note = {DOI: https://doi.org/10.1101/2022.08.31.505818},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Motivation. High throughput omics technologies have generated a wealth of large protein, gene and transcript datasets that have exacerbated the need for new methods to analyse and compare big datasets. Rank-rank hypergeometric overlap is an important threshold-free method to combine and visualize two ranked lists of P-values or fold-changes, usually from differential gene expression analyses. Here, we introduce a new rank-rank hypergeometric overlap-based method aimed at both gene level and alternative splicing analyses at transcript or exon level, hitherto unreachable as transcript numbers are an order of magnitude larger than gene numbers.Results. We tested the tool on synthetic and real datasets at gene and transcript levels to detect correlation and anti-correlation patterns and found it to be fast and accurate, even on very large datasets thanks to an evolutionary algorithm based minimal P-value search. The tool comes with a ready-to-use permutation scheme allowing the computation of adjusted P-values at low time cost. Additionally, the package is a drop-in replacement to previous packages as a compatibility mode is included, allowing to re-run older studies with close to no change to existing pipelines. RedRibbon holds the promise to accurately extricate detailed information from large analyses.Availability. RNA-sequencing datasets are available through the Gene Expression Omnibus (GEO) portal with accession numbers GSE159984, GSE133218, GSE137136, GSE98485, GSE148058 and GSE108413. The C libraries and R package code are open to the community with a permissive licence (GPL3) and available for download from GitHub https://github.com/antpiron/ale, https://github.com/antpiron/cRedRibbon and https://github.com/antpiron/RedRibbon. |
Ciortan, Madalina; Defrance, Matthieu GNN-based embedding for clustering scRNA-seq data Journal Article In: Bioinformatics, vol. 38, no. 4, pp. 1037-1044, 2022, (DOI: 10.1093/bioinformatics/btab787). @article{info:hdl:2013/343811b,
title = {GNN-based embedding for clustering scRNA-seq data},
author = {Madalina Ciortan and Matthieu Defrance},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/343811/3/btab787.pdf},
year = {2022},
date = {2022-01-01},
journal = {Bioinformatics},
volume = {38},
number = {4},
pages = {1037-1044},
abstract = {Abstract Motivation Single-cell RNA sequencing (scRNA-seq) provides transcriptomic profiling for individual cells, allowing researchers to study the heterogeneity of tissues, recognize rare cell identities and discover new cellular subtypes. Clustering analysis is usually used to predict cell class assignments and infer cell identities. However, the high sparsity of scRNA-seq data, accentuated by dropout events generates challenges that have motivated the development of numerous dedicated clustering methods. Nevertheless, there is still no consensus on the best performing method. Results graph-sc is a new method leveraging a graph autoencoder network to create embeddings for scRNA-seq cell data. While this work analyzes the performance of clustering the embeddings with various clustering algorithms, other downstream tasks can also be performed. A broad experimental study has been performed on both simulated and scRNA-seq datasets. The results indicate that although there is no consistently best method across all the analyzed datasets, graph-sc compares favorably to competing techniques across all types of datasets. Furthermore, the proposed method is stable across consecutive runs, robust to input down-sampling, generally insensitive to changes in the network architecture or training parameters and more computationally efficient than other competing methods based on neural networks. Modeling the data as a graph provides increased flexibility to define custom features characterizing the genes, the cells and their interactions. Moreover, external data (e.g. gene network) can easily be integrated into the graph and used seamlessly under the same optimization task. Availability and implementation https://github.com/ciortanmadalina/graph-sc. Supplementary information Supplementary data are available at Bioinformatics online.},
note = {DOI: 10.1093/bioinformatics/btab787},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Abstract Motivation Single-cell RNA sequencing (scRNA-seq) provides transcriptomic profiling for individual cells, allowing researchers to study the heterogeneity of tissues, recognize rare cell identities and discover new cellular subtypes. Clustering analysis is usually used to predict cell class assignments and infer cell identities. However, the high sparsity of scRNA-seq data, accentuated by dropout events generates challenges that have motivated the development of numerous dedicated clustering methods. Nevertheless, there is still no consensus on the best performing method. Results graph-sc is a new method leveraging a graph autoencoder network to create embeddings for scRNA-seq cell data. While this work analyzes the performance of clustering the embeddings with various clustering algorithms, other downstream tasks can also be performed. A broad experimental study has been performed on both simulated and scRNA-seq datasets. The results indicate that although there is no consistently best method across all the analyzed datasets, graph-sc compares favorably to competing techniques across all types of datasets. Furthermore, the proposed method is stable across consecutive runs, robust to input down-sampling, generally insensitive to changes in the network architecture or training parameters and more computationally efficient than other competing methods based on neural networks. Modeling the data as a graph provides increased flexibility to define custom features characterizing the genes, the cells and their interactions. Moreover, external data (e.g. gene network) can easily be integrated into the graph and used seamlessly under the same optimization task. Availability and implementation https://github.com/ciortanmadalina/graph-sc. Supplementary information Supplementary data are available at Bioinformatics online. |
Rivière, Quentin; Corso, Massimiliano; Ciortan, Madalina; Noël, Grégoire; Verbruggen, Nathalie; Defrance, Matthieu Exploiting Genomic Features to Improve the Prediction of Transcription Factor-Binding Sites in Plants. Journal Article In: Plant and Cell Physiology, vol. 63, no. 10, pp. 1457-1473, 2022, (DOI: 10.1093/pcp/pcac095). @article{info:hdl:2013/352290,
title = {Exploiting Genomic Features to Improve the Prediction of Transcription Factor-Binding Sites in Plants.},
author = {Quentin Rivière and Massimiliano Corso and Madalina Ciortan and Grégoire Noël and Nathalie Verbruggen and Matthieu Defrance},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/352290/3/Riviere_et_al.pdf},
year = {2022},
date = {2022-01-01},
journal = {Plant and Cell Physiology},
volume = {63},
number = {10},
pages = {1457-1473},
abstract = {The identification of transcription factor (TF) target genes is central in biology. A popular approach is based on the location by pattern matching of potential cis-regulatory elements (CREs). During the last few years, tools integrating next-generation sequencing data have been developed to improve the performance of pattern matching. However, such tools have not yet been comprehensively evaluated in plants. Hence, we developed a new streamlined method aiming at predicting CREs and target genes of plant TFs in specific organs or conditions. Our approach implements a supervised machine learning strategy, which allows decision rule models to be learnt using TF ChIP-chip/seq experimental data. Different layers of genomic features were integrated in predictive models: the position on the gene, the DNA sequence conservation, the chromatin state and various CRE footprints. Among the tested features, the chromatin features were crucial for improving the accuracy of the method. Furthermore, we evaluated the transferability of predictive models across TFs, organs and species. Finally, we validated our method by correctly inferring the target genes of key TFs controlling metabolite biosynthesis at the organ level in Arabidopsis. We developed a tool-Wimtrap-to reproduce our approach in plant species and conditions/organs for which ChIP-chip/seq data are available. Wimtrap is a user-friendly R package that supports an R Shiny web interface and is provided with pre-built models that can be used to quickly get predictions of CREs and TF gene targets in different organs or conditions in Arabidopsis thaliana, Solanum lycopersicum, Oryza sativa and Zea mays.},
note = {DOI: 10.1093/pcp/pcac095},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The identification of transcription factor (TF) target genes is central in biology. A popular approach is based on the location by pattern matching of potential cis-regulatory elements (CREs). During the last few years, tools integrating next-generation sequencing data have been developed to improve the performance of pattern matching. However, such tools have not yet been comprehensively evaluated in plants. Hence, we developed a new streamlined method aiming at predicting CREs and target genes of plant TFs in specific organs or conditions. Our approach implements a supervised machine learning strategy, which allows decision rule models to be learnt using TF ChIP-chip/seq experimental data. Different layers of genomic features were integrated in predictive models: the position on the gene, the DNA sequence conservation, the chromatin state and various CRE footprints. Among the tested features, the chromatin features were crucial for improving the accuracy of the method. Furthermore, we evaluated the transferability of predictive models across TFs, organs and species. Finally, we validated our method by correctly inferring the target genes of key TFs controlling metabolite biosynthesis at the organ level in Arabidopsis. We developed a tool-Wimtrap-to reproduce our approach in plant species and conditions/organs for which ChIP-chip/seq data are available. Wimtrap is a user-friendly R package that supports an R Shiny web interface and is provided with pre-built models that can be used to quickly get predictions of CREs and TF gene targets in different organs or conditions in Arabidopsis thaliana, Solanum lycopersicum, Oryza sativa and Zea mays. |
Bizet, Martin; Defrance, Matthieu; Calonne, Emilie; Bontempi, Gianluca; Sotiriou, Christos; Fuks, Franccois; Jeschke, Jana Improving Infinium MethylationEPIC data processing: re-annotation of enhancers and long noncoding RNA genes and benchmarking of normalization methods. Journal Article In: Epigenetics, vol. 17, no. 13, pp. 2434-2454, 2022, (DOI: 10.1080/15592294.2022.2135201). @article{info:hdl:2013/353467b,
title = {Improving Infinium MethylationEPIC data processing: re-annotation of enhancers and long noncoding RNA genes and benchmarking of normalization methods.},
author = {Martin Bizet and Matthieu Defrance and Emilie Calonne and Gianluca Bontempi and Christos Sotiriou and Franccois Fuks and Jana Jeschke},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/353467/5/KEPI_17_2135201.pdf},
year = {2022},
date = {2022-01-01},
journal = {Epigenetics},
volume = {17},
number = {13},
pages = {2434-2454},
abstract = {Illumina Infinium DNA Methylation (5mC) arrays are a popular technology for low-cost, high-throughput, genome-scale measurement of 5mC distribution, especially in cancer and other complex diseases. After the success of its HumanMethylation450 array (450k), Illumina released the MethylationEPIC array (850k) featuring increased coverage of enhancers. Despite the widespread use of 850k, analysis of the corresponding data remains suboptimal: it still relies mostly on Illumina's default annotation, which underestimates enhancerss and long noncoding RNAs. Results: We have thus developed an approach, based on the ENCODE and LNCipedia databases, which greatly improves upon Illumina's default annotation of enhancers and long noncoding transcripts. We compared the re-annotated 850k with both 450k and reduced-representation bisulphite sequencing (RRBS), another high-throughput 5mC profiling technology. We found 850k to cover at least three times as many enhancers and long noncoding RNAs as either 450k or RRBS. We further investigated the reproducibility of the three technologies, applying various normalization methods to the 850k data. Most of these methods reduced variability to a level below that of RRBS data. We then used 850k with our new annotation and normalization to profile 5mC changes in breast cancer biopsies. 850k highlighted aberrant enhancer methylation as the predominant feature, in agreement with previous reports. Our study provides an updated processing approach for 850k data, based on refined probe annotation and normalization, allowing for improved analysis of methylation at enhancers and long noncoding RNA genes. Our findings will help to further advance understanding of the DNA methylome in health and disease.},
note = {DOI: 10.1080/15592294.2022.2135201},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Illumina Infinium DNA Methylation (5mC) arrays are a popular technology for low-cost, high-throughput, genome-scale measurement of 5mC distribution, especially in cancer and other complex diseases. After the success of its HumanMethylation450 array (450k), Illumina released the MethylationEPIC array (850k) featuring increased coverage of enhancers. Despite the widespread use of 850k, analysis of the corresponding data remains suboptimal: it still relies mostly on Illumina's default annotation, which underestimates enhancerss and long noncoding RNAs. Results: We have thus developed an approach, based on the ENCODE and LNCipedia databases, which greatly improves upon Illumina's default annotation of enhancers and long noncoding transcripts. We compared the re-annotated 850k with both 450k and reduced-representation bisulphite sequencing (RRBS), another high-throughput 5mC profiling technology. We found 850k to cover at least three times as many enhancers and long noncoding RNAs as either 450k or RRBS. We further investigated the reproducibility of the three technologies, applying various normalization methods to the 850k data. Most of these methods reduced variability to a level below that of RRBS data. We then used 850k with our new annotation and normalization to profile 5mC changes in breast cancer biopsies. 850k highlighted aberrant enhancer methylation as the predominant feature, in agreement with previous reports. Our study provides an updated processing approach for 850k data, based on refined probe annotation and normalization, allowing for improved analysis of methylation at enhancers and long noncoding RNA genes. Our findings will help to further advance understanding of the DNA methylome in health and disease. |
Grolaux, Robin; Hardy, Alexis; Olsen, Catharina; Dooren, Sonia Van; Smits, Guillaume; Defrance, Matthieu Identification of differentially methylated regions in rare diseases from a single-patient perspective Journal Article In: Clinical Epigenetics, vol. 14, no. 1, 2022, (DOI: 10.1186/s13148-022-01403-7). @article{info:hdl:2013/353081,
title = {Identification of differentially methylated regions in rare diseases from a single-patient perspective},
author = {Robin Grolaux and Alexis Hardy and Catharina Olsen and Sonia Van Dooren and Guillaume Smits and Matthieu Defrance},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/353081/1/doi_336725.pdf},
year = {2022},
date = {2022-01-01},
journal = {Clinical Epigenetics},
volume = {14},
number = {1},
abstract = {Abstract Background DNA methylation (5-mC) is being widely recognized as an alternative in the detection of sequence variants in the diagnosis of some rare neurodevelopmental and imprinting disorders. Identification of alterations in DNA methylation plays an important role in the diagnosis and understanding of the etiology of those disorders. Canonical pipelines for the detection of differentially methylated regions (DMRs) usually rely on inter-group (e.g., case versus control) comparisons. However, these tools might perform suboptimally in the context of rare diseases and multilocus imprinting disturbances due to small cohort sizes and inter-patient heterogeneity. Therefore, there is a need to provide a simple but statistically robust pipeline for scientists and clinicians to perform differential methylation analyses at the single patient level as well as to evaluate how parameter fine-tuning may affect differentially methylated region detection. Result We implemented an improved statistical method to detect differentially methylated regions in correlated datasets based on the Z-score and empirical Brown aggregation methods from a single-patient perspective. To accurately assess the predictive power of our method, we generated semi-simulated data using a public control population of 521 samples and investigated how the size of the control population, methylation difference, and region size affect DMR detection. In addition, we validated the detection of methylation events in patients suffering from rare multi-locus imprinting disturbance and evaluated how this method could complement existing tools in the context of clinical diagnosis. Conclusion In this study, we present a robust statistical method to perform differential methylation analysis at the single patient level and describe its optimal parameters to increase DMRs identification performance. Finally, we show its diagnostic utility when applied to rare disorders.},
note = {DOI: 10.1186/s13148-022-01403-7},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Abstract Background DNA methylation (5-mC) is being widely recognized as an alternative in the detection of sequence variants in the diagnosis of some rare neurodevelopmental and imprinting disorders. Identification of alterations in DNA methylation plays an important role in the diagnosis and understanding of the etiology of those disorders. Canonical pipelines for the detection of differentially methylated regions (DMRs) usually rely on inter-group (e.g., case versus control) comparisons. However, these tools might perform suboptimally in the context of rare diseases and multilocus imprinting disturbances due to small cohort sizes and inter-patient heterogeneity. Therefore, there is a need to provide a simple but statistically robust pipeline for scientists and clinicians to perform differential methylation analyses at the single patient level as well as to evaluate how parameter fine-tuning may affect differentially methylated region detection. Result We implemented an improved statistical method to detect differentially methylated regions in correlated datasets based on the Z-score and empirical Brown aggregation methods from a single-patient perspective. To accurately assess the predictive power of our method, we generated semi-simulated data using a public control population of 521 samples and investigated how the size of the control population, methylation difference, and region size affect DMR detection. In addition, we validated the detection of methylation events in patients suffering from rare multi-locus imprinting disturbance and evaluated how this method could complement existing tools in the context of clinical diagnosis. Conclusion In this study, we present a robust statistical method to perform differential methylation analysis at the single patient level and describe its optimal parameters to increase DMRs identification performance. Finally, we show its diagnostic utility when applied to rare disorders. |
Marquis, Bastien; Jansen, Maarten Information criteria bias correction for group selection Journal Article In: Statistical papers, vol. 63, no. 5, pp. 1387-1414, 2022, (Language of publication: fr). @article{info:hdl:2013/335472b,
title = {Information criteria bias correction for group selection},
author = {Bastien Marquis and Maarten Jansen},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/335472/3/marquis22groupmirror.pdf},
year = {2022},
date = {2022-01-01},
journal = {Statistical papers},
volume = {63},
number = {5},
pages = {1387-1414},
note = {Language of publication: fr},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Paldino, Gian Marco; Caro, Fabrizio De; Stefani, Jacopo De; Vaccaro, Alfredo A.; Villacci, Domenico D.; Bontempi, Gianluca A Digital Twin Approach for Improving Estimation Accuracy in Dynamic Thermal Rating of Transmission Lines Journal Article In: Energies, vol. 15, no. 6, 2022, (DOI: 10.3390/en15062254). @article{info:hdl:2013/342471b,
title = {A Digital Twin Approach for Improving Estimation Accuracy in Dynamic Thermal Rating of Transmission Lines},
author = {Gian Marco Paldino and Fabrizio De Caro and Jacopo De Stefani and Alfredo A. Vaccaro and Domenico D. Villacci and Gianluca Bontempi},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/342471/1/doi_326115.pdf},
year = {2022},
date = {2022-01-01},
journal = {Energies},
volume = {15},
number = {6},
abstract = {The limitation of transmission lines thermal capacity plays a crucial role in the safety and reliability of power systems. Dynamic thermal line rating approaches aim to estimate the transmission line’s temperature and assess its compliance with the limitations above. Existing physics-based standards estimate the temperature based on environment and line conditions measured by several sensors. This manuscript shows that estimation accuracy can be improved by adopting a data-driven Digital Twin approach. The proposed method exploits machine learning by learning the input–output relation between the physical sensors data and the actual conductor temperature, serving as a digital equivalent to physics-based standards. An experimental assessment on real data, comparing the proposed approach with the IEEE 738 standard, shows a reduction of 60% of the Root Mean Squared Error and a decrease in the maximum estimation error from above 10 °C to below 7 °C. These preliminary results suggest that the Digital Twin provides more accurate and robust estimations, serving as a complement, or a potential alternative, to traditional methods.},
note = {DOI: 10.3390/en15062254},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The limitation of transmission lines thermal capacity plays a crucial role in the safety and reliability of power systems. Dynamic thermal line rating approaches aim to estimate the transmission line’s temperature and assess its compliance with the limitations above. Existing physics-based standards estimate the temperature based on environment and line conditions measured by several sensors. This manuscript shows that estimation accuracy can be improved by adopting a data-driven Digital Twin approach. The proposed method exploits machine learning by learning the input–output relation between the physical sensors data and the actual conductor temperature, serving as a digital equivalent to physics-based standards. An experimental assessment on real data, comparing the proposed approach with the IEEE 738 standard, shows a reduction of 60% of the Root Mean Squared Error and a decrease in the maximum estimation error from above 10 °C to below 7 °C. These preliminary results suggest that the Digital Twin provides more accurate and robust estimations, serving as a complement, or a potential alternative, to traditional methods. |
Bizet, Martin; Defrance, Matthieu; Calonne, Emilie; Bontempi, Gianluca; Sotiriou, Christos; Fuks, Franccois; Jeschke, Jana Improving Infinium MethylationEPIC data processing: re-annotation of enhancers and long noncoding RNA genes and benchmarking of normalization methods. Journal Article In: Epigenetics, vol. 17, no. 13, pp. 2434-2454, 2022, (DOI: 10.1080/15592294.2022.2135201). @article{info:hdl:2013/353467,
title = {Improving Infinium MethylationEPIC data processing: re-annotation of enhancers and long noncoding RNA genes and benchmarking of normalization methods.},
author = {Martin Bizet and Matthieu Defrance and Emilie Calonne and Gianluca Bontempi and Christos Sotiriou and Franccois Fuks and Jana Jeschke},
url = {https://dipot.ulb.ac.be/dspace/bitstream/2013/353467/5/KEPI_17_2135201.pdf},
year = {2022},
date = {2022-01-01},
journal = {Epigenetics},
volume = {17},
number = {13},
pages = {2434-2454},
abstract = {Illumina Infinium DNA Methylation (5mC) arrays are a popular technology for low-cost, high-throughput, genome-scale measurement of 5mC distribution, especially in cancer and other complex diseases. After the success of its HumanMethylation450 array (450k), Illumina released the MethylationEPIC array (850k) featuring increased coverage of enhancers. Despite the widespread use of 850k, analysis of the corresponding data remains suboptimal: it still relies mostly on Illumina's default annotation, which underestimates enhancerss and long noncoding RNAs. Results: We have thus developed an approach, based on the ENCODE and LNCipedia databases, which greatly improves upon Illumina's default annotation of enhancers and long noncoding transcripts. We compared the re-annotated 850k with both 450k and reduced-representation bisulphite sequencing (RRBS), another high-throughput 5mC profiling technology. We found 850k to cover at least three times as many enhancers and long noncoding RNAs as either 450k or RRBS. We further investigated the reproducibility of the three technologies, applying various normalization methods to the 850k data. Most of these methods reduced variability to a level below that of RRBS data. We then used 850k with our new annotation and normalization to profile 5mC changes in breast cancer biopsies. 850k highlighted aberrant enhancer methylation as the predominant feature, in agreement with previous reports. Our study provides an updated processing approach for 850k data, based on refined probe annotation and normalization, allowing for improved analysis of methylation at enhancers and long noncoding RNA genes. Our findings will help to further advance understanding of the DNA methylome in health and disease.},
note = {DOI: 10.1080/15592294.2022.2135201},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Illumina Infinium DNA Methylation (5mC) arrays are a popular technology for low-cost, high-throughput, genome-scale measurement of 5mC distribution, especially in cancer and other complex diseases. After the success of its HumanMethylation450 array (450k), Illumina released the MethylationEPIC array (850k) featuring increased coverage of enhancers. Despite the widespread use of 850k, analysis of the corresponding data remains suboptimal: it still relies mostly on Illumina's default annotation, which underestimates enhancerss and long noncoding RNAs. Results: We have thus developed an approach, based on the ENCODE and LNCipedia databases, which greatly improves upon Illumina's default annotation of enhancers and long noncoding transcripts. We compared the re-annotated 850k with both 450k and reduced-representation bisulphite sequencing (RRBS), another high-throughput 5mC profiling technology. We found 850k to cover at least three times as many enhancers and long noncoding RNAs as either 450k or RRBS. We further investigated the reproducibility of the three technologies, applying various normalization methods to the 850k data. Most of these methods reduced variability to a level below that of RRBS data. We then used 850k with our new annotation and normalization to profile 5mC changes in breast cancer biopsies. 850k highlighted aberrant enhancer methylation as the predominant feature, in agreement with previous reports. Our study provides an updated processing approach for 850k data, based on refined probe annotation and normalization, allowing for improved analysis of methylation at enhancers and long noncoding RNA genes. Our findings will help to further advance understanding of the DNA methylome in health and disease. |
Ciortan, Madalina; Defrance, Matthieu GNN-based embedding for clustering scRNA-seq data Journal Article In: Bioinformatics, vol. 38, no. 4, pp. 1037-1044, 2022, (DOI: 10.1093/bioinformatics/btab787). @article{info:hdl:2013/343811,
title = {GNN-based embedding for clustering scRNA-seq data},
author = {Madalina Ciortan and Matthieu Defrance},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/343811},
year = {2022},
date = {2022-01-01},
journal = {Bioinformatics},
volume = {38},
number = {4},
pages = {1037-1044},
note = {DOI: 10.1093/bioinformatics/btab787},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Montero-Porras, Eladio; Grujić, Jelena; Domingos, Elias Fernández; Lenaerts, Tom Inferring strategies from observations in long iterated Prisoner’s dilemma experiments Journal Article In: Scientific reports, vol. 12, no. 1, 2022, (DOI: 10.1038/s41598-022-11654-2). @article{info:hdl:2013/344327,
title = {Inferring strategies from observations in long iterated Prisoner’s dilemma experiments},
author = {Eladio Montero-Porras and Jelena Grujić and Elias Fernández Domingos and Tom Lenaerts},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/344327},
year = {2022},
date = {2022-01-01},
journal = {Scientific reports},
volume = {12},
number = {1},
note = {DOI: 10.1038/s41598-022-11654-2},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Cimpeanu, Theodor; Santos, Francisco C.; Pereira, Luís Marcelo; Lenaerts, Tom; Han, The Anh T. A. H. Artificial intelligence development races in heterogeneous settings Journal Article In: Scientific reports, vol. 12, no. 1, 2022, (DOI: 10.1038/s41598-022-05729-3). @article{info:hdl:2013/341515,
title = {Artificial intelligence development races in heterogeneous settings},
author = {Theodor Cimpeanu and Francisco C. Santos and Luís Marcelo Pereira and Tom Lenaerts and The Anh T. A. H. Han},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/341515},
year = {2022},
date = {2022-01-01},
journal = {Scientific reports},
volume = {12},
number = {1},
note = {DOI: 10.1038/s41598-022-05729-3},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Marquis, Bastien; Jansen, Maarten Information criteria bias correction for group selection Journal Article In: Statistical papers, 2022, (Language of publication: fr). @article{info:hdl:2013/335472,
title = {Information criteria bias correction for group selection},
author = {Bastien Marquis and Maarten Jansen},
url = {http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/335472},
year = {2022},
date = {2022-01-01},
journal = {Statistical papers},
note = {Language of publication: fr},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|