@article{open2031, volume = {7}, title = {Gene expression-based biomarkers for discriminating early and late stage of clear cell renal cancer}, author = {Sherry Bhalla and Kumardeep Chaudhary and Ritesh Kumar and Manika Sehgal and Harpreet Kaur and Suresh Sharma and G.P.S. Raghava}, publisher = {Nature Publishing Group}, year = {2017}, note = {Open Access}, journal = {Scientific Reports}, url = {http://crdd.osdd.net/open/2031/}, abstract = {In this study, an attempt has been made to identify expression-based gene biomarkers that can discriminate early and late stage of clear cell renal cell carcinoma (ccRCC) patients. We have analyzed the gene expression of 523 samples to identify genes that are differentially expressed in the early and late stage of ccRCC. First, a threshold-based method has been developed, which attained a maximum accuracy of 71.12\% with ROC 0.67 using single gene NR3C2. To improve the performance of threshold-based method, we combined two or more genes and achieved maximum accuracy of 70.19\% with ROC of 0.74 using eight genes on the validation dataset. These eight genes include four underexpressed (NR3C2, ENAM, DNASE1L3, FRMPD2) and four overexpressed (PLEKHA9, MAP6D1, SMPD4, C11orf73) genes in the late stage of ccRCC. Second, models were developed using state-of-art techniques and achieved maximum accuracy of 72.64\% and 0.81 ROC using 64 genes on validation dataset. Similar accuracy was obtained on 38 genes selected from subset of genes, involved in cancer hallmark biological processes. Our analysis further implied a need to develop gender-specific models for stage classification. A web server, CancerCSP, has been developed to predict stage of ccRCC using gene expression data derived from RNAseq experiments.} }