##这个可以跑
if(T){ gset <- getGEO("GSE42872",destdir = ".",AnnotGPL = F,getGPL = F) save(gset,file ="GSE42872_eSet.Rdata") } ##也就说if(!file.exists(f))等同于if(T)。!file.exists(f)返回的逻辑值是T,才执行后面{}中的内容。file.exist(f)返回FALSE,前面加上!表示否定,即为真
##这个跑了没动静,语法错误。。。哈哈 if(F){ gset <- getGEO("GSE42872",destdir = ".",AnnotGPL = F,getGPL = F) save(gset,file ="GSE42872_eSet.Rdata") }
> dir() [1] "GSE42872_eSet.Rdata" "GSE42872_series_matrix.txt.gz" > f = "a.txt" > file.exists(f) [1] FALSE > dir() [1] "a.txt" "GSE42872_eSet.Rdata" [3] "GSE42872_series_matrix.txt.gz" > file.exists(f) [1] TRUE
##从GEO数据库导入GSE文件得到的一个list,其第一个元素才是ExpressionSet对象,元素名就是该GSE号对应的压缩文件名
> gset <- getGEO("GSE42872",destdir = ".",AnnotGPL = F,getGPL = F) ##注释文件和平台文件都不要 Found 1 file(s) GSE42872_series_matrix.txt.gz 试开URL’https://ftp.ncbi.nlm.nih.gov/geo/series/GSE42nnn/GSE42872/matrix/GSE42872_series_matrix.txt.gz‘ Content type ‘application/x-gzip‘ length 768865 bytes (750 KB) downloaded 750 KB Parsed with column specification: cols( ID_REF = col_double(), GSM1052615 = col_double(), GSM1052616 = col_double(), GSM1052617 = col_double(), GSM1052618 = col_double(), GSM1052619 = col_double(), GSM1052620 = col_double() ) > class(gset) [1] "list" > length(gset) [1] 1 > class(gset[[1]]) [1] "ExpressionSet" attr(,"package") [1] "Biobase" > gset $GSE42872_series_matrix.txt.gz ExpressionSet (storageMode: lockedEnvironment) assayData: 33297 features, 6 samples element names: exprs protocolData: none phenoData sampleNames: GSM1052615 GSM1052616 ... GSM1052620 (6 total) varLabels: title geo_accession ... cell type:ch1 (34 total) varMetadata: labelDescription featureData: none experimentData: use ‘experimentData(object)‘ pubMedIds: 24469106 Annotation: GPL6244
##如果是已经下好了GSE号对应的原始压缩文件(这个好像不是最原来的raw.data吧),getGEO()可以直接读取之,得到直接就是上述的gset[[1]],就是那个ExpressionSet对象。
> a = getGEO(file = "GSE42872_series_matrix.txt.gz",AnnotGPL = F,getGPL = F) Parsed with column specification: cols( ID_REF = col_double(), GSM1052615 = col_double(), GSM1052616 = col_double(), GSM1052617 = col_double(), GSM1052618 = col_double(), GSM1052619 = col_double(), GSM1052620 = col_double() ) |=====================================================================================| 100% 1 MB ##1M?好像就是解压缩了一样。。。 > class(a) [1] "ExpressionSet" attr(,"package") [1] "Biobase" > length(a) [1] 1 > a ExpressionSet (storageMode: lockedEnvironment) assayData: 33297 features, 6 samples element names: exprs protocolData: none phenoData sampleNames: GSM1052615 GSM1052616 ... GSM1052620 (6 total) varLabels: title geo_accession ... cell type:ch1 (34 total) varMetadata: labelDescription featureData: none experimentData: use ‘experimentData(object)‘ pubMedIds: 24469106 Annotation: GPL6244
> b = getGEO(file = "GSE42872_series_matrix.txt.gz") ##如果不加参数Annotation及getGPL,好像会额外下载文件GPL6244.soft这个文件?不知道是啥?注释文件嘛? Parsed with column specification: cols( ID_REF = col_double(), GSM1052615 = col_double(), GSM1052616 = col_double(), GSM1052617 = col_double(), GSM1052618 = col_double(), GSM1052619 = col_double(), GSM1052620 = col_double() ) File stored at: C:\Users\HWC\AppData\Local\Temp\RtmpGqMiOr/GPL6244.soft |=====================================================================================| 100% 96 MB
##下不下GPL6244.soft这个文件是AnnotGPL这个参数定的,默认为T,下载
> c = getGEO(file = "GSE42872_series_matrix.txt.gz",getGPL = F) Parsed with column specification: cols( ID_REF = col_double(), GSM1052615 = col_double(), GSM1052616 = col_double(), GSM1052617 = col_double(), GSM1052618 = col_double(), GSM1052619 = col_double(), GSM1052620 = col_double() ) > class(c) [1] "ExpressionSet" attr(,"package") [1] "Biobase" > d = getGEO(file = "GSE42872_series_matrix.txt.gz",AnnotGPL = F) Parsed with column specification: cols( ID_REF = col_double(), GSM1052615 = col_double(), GSM1052616 = col_double(), GSM1052617 = col_double(), GSM1052618 = col_double(), GSM1052619 = col_double(), GSM1052620 = col_double() ) Using locally cached version of GPL6244 found here: C:\Users\HWC\AppData\Local\Temp\RtmpGqMiOr/GPL6244.soft |=====================================================================================| 100% 96 MB
> ls() [1] "a" "b" "c" "d" "gset" > rm(c(b,c)) Error in rm(c(b, c)) : ...要么含名字,要么是字符串 ##c()里边如果是字符串,要有引号。。。 > rm(b,c) ##还以为要用c(),删除多个,直接逗号隔开列出即可 > ls() [1] "a" "d" "gset" > history(20) ##history()查看历史命令
跟着jmzeng学习GEO数据分析-GEO42872_1--题外
原文:https://www.cnblogs.com/SWTwanzhu/p/13127313.html