@article{a947e66690bc40499227f038b7c754f3,
title = "Cost-sensitive ensemble feature ranking and automatic threshold selection for chronic kidney disease diagnosis",
abstract = "Automated medical diagnosis is one of the important machine learning applications in the domain of healthcare. In this regard, most of the approaches primarily focus on optimizing the accuracy of classification models. In this research, we argue that, unlike general-purpose classification problems, medical applications, such as chronic kidney disease (CKD) diagnosis, require special treatment. In the case of CKD, apart from model performance, other factors such as the cost of data acquisition may also be taken into account to enhance the applicability of the automated diagnosis system. In this research, we proposed two techniques for cost-sensitive feature ranking. An ensemble of decision tree models is employed in both the techniques for computing the worth of a feature in the CKD dataset. An automatic threshold selection heuristic is also introduced which is based on the intersection of features' worth and their accumulated cost. A set of experiments are conducted to evaluate the efficacy of the proposed techniques on both tree-based and non tree-based classification models. The proposed approaches were also evaluated against several comparative techniques. Furthermore, it is demonstrated that the proposed techniques select around 1/4th of the original CKD features while reducing the cost by a factor of 7.42 of the original feature set. Based on the extensive experimentation, it is concluded that the proposed techniques employing feature-cost interaction heuristic tend to select feature subsets that are both useful and cost-effective.",
keywords = "Chronic kidney disease, Cost-sensitive feature selection, Decision tree classifiers, Ensemble models, Gradient boosted trees, Random forest",
author = "Ali, {Syed Imran} and Bilal Ali and Jamil Hussain and Musarrat Hussain and Satti, {Fahad Ahmed} and Park, {Gwang Hoon} and Sungyoung Lee",
note = "Funding Information: Funding: This research was supported by the MSIT (Ministry of Science and ICT), Korea, under the ITRC (Information Technology Research Center) support program (IITP-2017-0-01629) supervised by the IITP (Institute for Information & communications Technology Promotion), this work was supported by the Institute for Information & communications Technology Promotion (IITP) grant funded by the Korea government (MSIT) (No.2017-0-00655), this research was supported by the MSIT(Ministry of Science and ICT), Korea, under the Grand Information Technology Research Center support program(IITP-2020-0-01489) supervised by the IITP(Institute for Information & communications Technology Planning & Evaluation), NRF-2016K1A3A7A03951968 (EU), and NRF-2019R1A2C2090504. Funding Information: This research was supported by the MSIT (Ministry of Science and ICT), Korea, under the ITRC (Information Technology Research Center) support program (IITP-2017-0-01629) supervised by the IITP (Institute for Information & communications Technology Promotion), this work was supported by the Institute for Information & communications Technology Promotion (IITP) grant funded by the Korea government (MSIT) (No.2017-0-00655), this research was supported by the MSIT(Ministry of Science and ICT), Korea, under the Grand Information Technology Research Center support program(IITP-2020-0-01489) supervised by the IITP(Institute for Information & communications Technology Planning & Evaluation), NRF-2016K1A3A7A03951968 (EU), and NRF-2019R1A2C2090504. Publisher Copyright: {\textcopyright} 2020 by the authors.",
year = "2020",
month = aug,
doi = "10.3390/app10165663",
language = "English",
volume = "10",
journal = "Applied Sciences (Switzerland)",
issn = "2076-3417",
publisher = "Multidisciplinary Digital Publishing Institute",
number = "16",
}