Skip to contents

Normalizes read counts by the TPM (Transcripts Per Million) method:

  1. If log_trans = TRUE, applies log2(TPM + 1).

Usage

tpm_normalization(
  x,
  gene_length = NULL,
  log_trans = FALSE,
  assay_name = NULL,
  new_assay_name = NULL
)

Arguments

x

A numeric matrix or data.frame of gene counts, or a SummarizedExperiment containing such counts.

If a SummarizedExperiment,

the function retrieves gene_length from rowData(x)$gene_length.

If a data.frame/matrix,

the user must provide the gene_length argument.

gene_length

A numeric vector of gene lengths (one per row), used only if x is a data.frame or matrix. Must match the number of rows in x. Ignored if x is a SummarizedExperiment.

log_trans

Logical. If TRUE, apply log2(... + 1) transform to the TPM-normalized values.

assay_name

If x is a SummarizedExperiment, name of the assay to normalize. Defaults to the first assay if not specified.

new_assay_name

If x is a SummarizedExperiment, name of a new assay in which to store the TPM (or log2-TPM). If NULL, overwrites the assay specified in assay_name.

Value

A numeric matrix of TPM or log2(TPM + 1) values if x is a data.frame or matrix. If x is a SummarizedExperiment, returns the modified SummarizedExperiment with the TPM data placed in the existing or new assay.

Details

If x is a SummarizedExperiment, this function looks for a numeric column named "gene_length" in rowData(x). That column must have length equal to the number of rows in the assay being normalized.

Examples

library(SummarizedExperiment)
library(airway)
data('airway')

se = airway

# Only use a random subset of 1000 rows
set.seed(123)
idx <- sample(seq_len(nrow(se)), size = min(1000, nrow(se)))
se <- se[idx, ]

### Adding a column in rowData regarding the gene_length
rowData(se)$gene_length = rowData(se)$gene_seq_end
- rowData(se)$gene_seq_start
#>    [1]  -88120993  -87282781  -12757325  -33023833  -89294212  -31676987
#>    [7] -235459180  -17838649  -72042487  -10761177  -28941559 -142124137
#>   [13] -179696098  -15144583  -23089888  -17188208  -72087382   -8941623
#>   [19]   -6128914  -16866251  -26868664 -105501459  -52684257  -21858909
#>   [25]  -16695645  -64550950  -27306656  -30492089  -47720564 -115140430
#>   [31] -153507075 -207080964  -30709030  -49455025  -29811309 -188328957
#>   [37] -129622944  -28825301  -73302061 -187079050  -28699806  -27995979
#>   [43]  -33888558 -132086509  -16309079 -138873841   -4542600  -41271078
#>   [49]  -90048800 -207565789   -7052671  -41757641  -70478577  -15074226
#>   [55]  -39874406  -61470716 -118124118  -54036874 -121829492  -71256158
#>   [61]  -29758816    -926175  -45229248  -47590165  -14683085  -16362309
#>   [67]  -70610204  -30615556  -63904180  -56364902 -114821440 -101462315
#>   [73]  -58563710   -9158422  -75955846  -75598948 -133781578  -63340667
#>   [79] -111179442 -216669454  -14730915   -3411606  -38628029  -16799842
#>   [85]   -8972412 -168720862 -231658134 -216139715  -31654739  -30687978
#>   [91]  -32318107 -177389607  -48013379  -33211032    -228292  -50086067
#>   [97]  -23781213  -29687680  -53317443 -102040595  -67801364  -14531675
#>  [103] -127291912  -21760811  -69825198  -39836850  -40110945  -49812902
#>  [109]  -11081835 -172582568  -77177778  -19239375  -23785369  -28962606
#>  [115]  -14988214  -31455615  -46122503  -58012493  -55954970  -71566815
#>  [121]  -12560575 -154795158  -32079238  -48876286  -44754135  -49328797
#>  [127]  -95857221  -47407633  -33545496   -7074112  -19985371  -29964122
#>  [133] -176762649  -99425636  -10426888   -9860690 -141583849 -124165165
#>  [139] -143087382 -194115550 -113886019  -88198893  -24882052  -35441923
#>  [145] -133561448  -15978886 -109589700  -29065053  -13752832  -55179002
#>  [151]  -45364633 -100150641  -26388172  -31500028  -48046334 -141627157
#>  [157]   -4688580  -46642671  -36155221  -67024902  -38077680  -39745930
#>  [163]  -55888947  -27983346  -66792382  -19618273 -167584288 -108511433
#>  [169]  -25732010  -28654360 -175794949  -60679180 -111727037 -179224597
#>  [175]  -80703085  -49993772    -778745  -30030355  -47595218  -77228532
#>  [181]  -32336148  -23260304 -240547392  -38426265 -119205237  -21531151
#>  [187]  -20697561 -153786077 -208545257  -37820440 -126959811  -25662920
#>  [193] -151883082  -86426478 -138710452  -32828155 -151673502  -51568647
#>  [199] -157507131 -147688346  -64412212  -29364416  -32718320  -20033158
#>  [205]  -75142499  -31368479  -75560749  -93500171  -19901823  -71335563
#>  [211] -103749270  -88963992  -45192393  -27391732   -6581407 -140762467
#>  [217] -154549247  -17202383 -138269668  -33762485  -38270326   -9811163
#>  [223]  -58754814   -9376066  -47799469  -70070478  -45738661  -68266729
#>  [229]  -54864227  -28659681  -12111695  -46180719 -131904316  -56467862
#>  [235] -143271839 -118894824 -129800674  -33144500 -155305059   -6600914
#>  [241] -138818490  -64772226  -98969706  -32537632  -73248920 -149018956
#>  [247] -150935507  -64813593 -159792310  -43328004  -25164349   -2615603
#>  [253]  -76414714  -45837859  -24204375 -109512836  -74955146  -11944905
#>  [259]  -68259872  -47172182  -19111897 -156543270  -24144509  -11998599
#>  [265]  -64781654  -31733961  -50937284  -27624416  -45385284 -168625959
#>  [271]  -30372300  -20232411  -99386837  -16501106   -9255104  -50080407
#>  [277]  -11910633  -75385754  -33316446  -33579823  -67707826 -109244179
#>  [283]  -53545427  -42668608   -9546789   -6111336  -71104590   -8754762
#>  [289]  -57846106  -73144658  -22007593  -91966408  -29587685  -48276432
#>  [295]  -35146491   -3182069 -100625085  -18485541  -52416758  -30344193
#>  [301]  -29412457  -73125647  -25181587  -58446019  -30708329  -85594708
#>  [307] -132240835 -111322064 -133200348  -43578255  -30715542  -20433355
#>  [313] -221056599  -50809639  -43490072   -3568514  -50648438  -30635612
#>  [319]  -26900135 -103715540  -69568260  -56720763  -74896728 -197627756
#>  [325] -101768604 -174252846  -46126998 -112520900  -64557620  -90286573
#>  [331]  -23473154  -33084366  -12035890  -21487968 -203640690  -55155340
#>  [337] -238090131 -139334549  -36817318 -159393903  -71991195  -49808176
#>  [343]   -5081181  -30359002  -41535013 -154697947  -12919021  -13777574
#>  [349]  -50452574 -150690028 -156822542  -87345503 -150954615  -44575673
#>  [355] -115720487  -10596796  -31043216 -121133256 -145239296 -102113565
#>  [361]  -95860971   -3811317  -31941653   -8818975  -56223701 -144371846
#>  [367]  -40361098  -57832290   -3672580  -43124096  -28021006  -33179163
#>  [373] -139085251  -32670370  -67726254 -155248063 -182584389 -183814852
#>  [379]  -36844393  -33571888  -29113866  -92029174 -216444130 -153769414
#>  [385]  -64552393  -90479081  -32272813  -31348188  -70748487    -934342
#>  [391]   -9570309  -40736224 -117085336  -74209946 -110608472   -4457959
#>  [397]  -22002902    -507299  -16227138  -32936437  -75548822 -131633547
#>  [403] -111921078  -81573377  -77540700 -160320218 -117016266 -133320316
#>  [409]  -31679548   -7242183  -34960913 -129245835 -153777201 -119600293
#>  [415] -122896963  -55609382  -63359095  -45579768  -70514471  -32969203
#>  [421]  -91260558  -41514164  -28829201  -71820807   -8019943  -47072628
#>  [427]  -46780316  -99324234  -17563020 -157180944  -11653304 -172514219
#>  [433]  -50968139  -52807744 -105104916  -30516378 -105750328  -34569648
#>  [439]  -44080952  -13378826  -72036639 -118507335  -36921319  -35732332
#>  [445]  -33393279 -131078616  -48634408   -4890449  -67840668  -17145878
#>  [451]  -52848310 -223741977  -45490715  -70196492  -56152975 -130581186
#>  [457] -167148917  -99518147  -88400637  -29340936 -131492065 -151561893
#>  [463] -199983817  -59664892  -80444832 -101768122  -89966927  -31581035
#>  [469]  -40013593 -130911350  -61211022 -100652475  -70385005  -49932658
#>  [475] -148823508 -115624966  -17270258  -92100031 -107074907  -69441858
#>  [481]  -31616725  -75013517  -19949081  -11367144  -51601883  -39279811
#>  [487]  -46233789  -56214744  -97709633  -32554352 -207731519  -33704939
#>  [493] -222909244  -93468277  -91624949  -99391474 -135243898  -70002351
#>  [499]  -86626576  -13374755  -51007290  -15105832  -35535631  -32846948
#>  [505] -227916240 -208051441 -133733487 -132795360  -51675020  -28994461
#>  [511] -200284563  -50887461  -74122661  -41321107  -53970989 -120081475
#>  [517] -164647583  -48198636  -22972532  -89553057  -42847356  -33357416
#>  [523]  -47422521  -21919351  -51131624 -153325594 -179220981  -45123770
#>  [529]  -19841408 -111303218  -55996371  -29777040  -47676246 -150782181
#>  [535]  -78820606  -99933702  -53569859  -59927160  -21960468 -149305910
#>  [541]  -55409405    -756175  -95610280  -54840803  -32809834  -10708677
#>  [547] -173978405  -96212279  -23492745  -11292423  -94806447  -40479776
#>  [553]  -69863007  -47208001  -54822087  -21581798 -133918175 -189463518
#>  [559]  -71239462  -15492166  -26325463 -125551344  -58358435  -22323334
#>  [565]  -28527013  -56796883  -65122238  -75713481   -2289725  -44444615
#>  [571]  -86057963  -67263273   -7584802  -73813052  -73931180 -112861197
#>  [577] -196366557  -11943406 -160974069  -40257656  -36607322  -51907612
#>  [583]  -27866680  -80376194  -81272053   -2651372  -15932606 -119994818
#>  [589] -158149737  -22034831  -39975064  -11281388  -40074772 -144043972
#>  [595]  -95655980  -13514519 -129473874 -132374504 -114710405  -27840926
#>  [601]  -64927984  -88781751  -41397900  -79632066  -32742904  -26213377
#>  [607] -166733216  -57496299  -70033894  -29885325 -118602363  -26688623
#>  [613]  -66499743  -39856148 -244538402  -38978676 -139236276 -144470791
#>  [619]  -11410733 -144557036  -71088936 -107032509 -145625476   -8359541
#>  [625]  -29267101  -49067140  -79789454  -31335308  -67789674  -50526670
#>  [631]  -25304275  -11415975  -63303692 -109517591  -93142416 -122321344
#>  [637]   -2767746  -70116806  -19041448  -32252966  -39408473  -16713612
#>  [643]  -38301664  -49768109  -80044031  -77414400  -97700179  -36438698
#>  [649]  -25365594 -111279342  -14153580  -62313471  -66546823 -122489800
#>  [655] -115387607  -74547434 -146085944  -39443285  -21857754  -58985384
#>  [661]  -19726707  -21915047 -147368803  -29213603 -146674857  -94727539
#>  [667]  -92297489  -21727734  -85291866  -43072675 -142531859  -37479778
#>  [673]  -78136555 -112332191  -75179847   -8367011  -21880209  -82443053
#>  [679]  -89141721 -105135116  -22298029 -118281781   -2259254  -19581601
#>  [685]  -84732774  -30964485 -158500336  -90608874  -55738587  -43892596
#>  [691] -113149159  -31654726   -4770682  -23330438  -23116383  -41003201
#>  [697]  -29297452 -125235823 -142233142  -92437000 -149121520 -101389766
#>  [703] -203239655  -53386094 -114514938   -9389124 -151867214  -45535737
#>  [709]  -56922379   -1520790  -81263268  -83739814  -94245747  -52734431
#>  [715]  -40516806  -70503042  -32096994 -140660672 -114900077  -85177105
#>  [721]  -52831526  -62224587  -75552401  -12995237  -31857650  -25082811
#>  [727] -149539777 -100519140  -29765418 -249144205  -70825245   -5082831
#>  [733]  -71407244  -11891612  -65820615  -31162977    -196738 -201966812
#>  [739]  -25218916  -40465342  -43343485  -48886022   -1107636  -30498398
#>  [745]   -7327830  -55182733 -248153569  -45021186  -36874022   -1205708
#>  [751]  -73656471  -40704468  -45140364  -30065837    -584441 -103472343
#>  [757]  -12993227  -38374557  -61986957  -47325601 -161952982   -7571938
#>  [763]  -80247922  -34146507  -95308745 -124490418 -122316634 -100011780
#>  [769] -228735770 -145492601  -19970854  -89066835 -156699884  -32592994
#>  [775]  -55762890  -55102917  -16634518  -77092786  -12407895  -55026196
#>  [781]  -45504688  -93544792   -3875548  -11753359 -133646992 -106351889
#>  [787]   -3194929  -54382247  -68888415  -50766573  -27030215 -102277496
#>  [793]  -35235281 -177229419  -26691378 -133195366  -42298769  -72306396
#>  [799]  -99205497   -2433482  -72039591  -96713905  -27068733  -31157983
#>  [805]  -35759431  -11314304 -101928441  -44527399  -57975928 -101805135
#>  [811] -108183519  -59998045 -197107878  -22548022  -18886731 -183331408
#>  [817]  -74075134  -81001440  -25299357  -41347351 -100795923  -49297286
#>  [823] -152126979 -100081381  -30046174 -147030607    -660337  -43084393
#>  [829] -106511937  -38409766 -184250451  -75133306 -139624624 -120305606
#>  [835] -126512388 -155017667  -64907087 -121974941  -30103885 -121986062
#>  [841] -110318481 -159614374 -192769701 -179405852 -103131609  -31191683
#>  [847]  -21752643 -144519825  -41086244  -87173497  -16413687  -56085783
#>  [853]   -4999933  -55680797   -3688140 -102176843  -83318984 -145372975
#>  [859] -106494135   -6693782  -45784167  -24411377 -156126145  -74424713
#>  [865] -146673130  -71787166 -103515981   -3728645  -58272352 -206698210
#>  [871]  -37154246  -27467501 -100068762  -49754517  -86740882 -109338861
#>  [877]  -27778950  -98680418 -112396384  -95918245  -44798777  -34084330
#>  [883]  -21751118  -15532319  -39378846  -20884802  -26573480  -30864238
#>  [889]  -42881776  -94873638     -47225  -62437745  -72700732  -49264000
#>  [895] -107449179  -83087381  -72114632  -27687116 -104973237  -18570942
#>  [901]  -36554476  -19690056  -56210102 -114691203  -86047558  -30651450
#>  [907] -108535752    -928257  -49581850  -50337321  -90644085  -77316233
#>  [913]  -73641085  -82466565  -64701943 -170140210  -30462766 -100551034
#>  [919]  -30178473   -6067037   -9928411  -55608429 -138137542  -69402902
#>  [925]  -25196528 -207507142  -11157029 -120025573  -31535410  -23673224
#>  [931]  -87354967  -22338213  -41925356  -89065324  -22634861 -157297428
#>  [937] -132269316  -19520895  -57570240  -43824008  -50402491 -104140093
#>  [943]  -20883146  -75581367 -238778547  -29706410  -47566590  -49840684
#>  [949] -116917840 -191857365  -40425969  -72416119  -31815830  -76481258
#>  [955]  -49397103  -17433942  -43290742 -124882361  -48231338  -35896290
#>  [961]  -33059284   -8428173 -120969303  -46188475   -8640864 -155255323
#>  [967]  -47134527  -56960419   -2517930  -10106673  -30467330   -4610073
#>  [973]  -82031576 -101874174 -119033140   -1247566 -128598439  -69512348
#>  [979] -141562660  -42889337  -33629119  -22844930  -46117312 -173472607
#>  [985]  -44609616  -29846028 -100537190 -142510271  -29420987  -41681580
#>  [991]  -37436018  -10163226 -158122928  -30349902 -100792472  -45915480
#>  [997]   -7876569  -85614197 -143781529  -86046444

# -------------------------------
# 1) Using a data.frame
# -------------------------------

gene_length = rowData(se)$gene_length

df = assay(se)

## Without log transformation
df = tpm_normalization(df, gene_length = gene_length)

df[1:5, 1:5]
#>                  SRR1039508  SRR1039509 SRR1039512 SRR1039513 SRR1039516
#> ENSG00000260166    0.000000    0.000000      0.000      0.000      0.000
#> ENSG00000266931    0.000000    0.000000      0.000      0.000      0.000
#> ENSG00000104774 7988.133038 9155.891497   7580.641   7027.455   6525.079
#> ENSG00000267583    0.000000    1.857346      0.000      0.000      0.000
#> ENSG00000227581    0.589507    0.000000      0.000      0.000      0.000

## With log transformation
df = tpm_normalization(df, gene_length = gene_length, log_trans = TRUE)

df[1:5, 1:5]
#>                  SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516
#> ENSG00000260166  0.00000000  0.0000000    0.00000    0.00000    0.00000
#> ENSG00000266931  0.00000000  0.0000000    0.00000    0.00000    0.00000
#> ENSG00000104774 10.47209331 10.6209113   10.57045   10.36653   10.23446
#> ENSG00000267583  0.00000000  0.1679188    0.00000    0.00000    0.00000
#> ENSG00000227581  0.02146413  0.0000000    0.00000    0.00000    0.00000

# -------------------------------
# 2) Using a SummarizedExperiment
# -------------------------------

# If now new_assay_name is provided, then overwrites existing assay
se2 = tpm_normalization(se, log_trans = FALSE)

head(assay(se2))
#>                  SRR1039508  SRR1039509 SRR1039512 SRR1039513 SRR1039516
#> ENSG00000260166    0.000000    0.000000      0.000      0.000      0.000
#> ENSG00000266931    0.000000    0.000000      0.000      0.000      0.000
#> ENSG00000104774 7988.133038 9155.891497   7580.641   7027.455   6525.079
#> ENSG00000267583    0.000000    1.857346      0.000      0.000      0.000
#> ENSG00000227581    0.589507    0.000000      0.000      0.000      0.000
#> ENSG00000227317    0.000000    0.000000      0.000      0.000      0.000
#>                   SRR1039517   SRR1039520 SRR1039521
#> ENSG00000260166    0.0000000    0.6380541      0.000
#> ENSG00000266931    0.0000000    0.0000000      0.000
#> ENSG00000104774 6742.8769431 7986.7194711   8515.681
#> ENSG00000267583    0.0000000    0.0000000      0.000
#> ENSG00000227581    0.8534855    0.0000000      0.000
#> ENSG00000227317    0.0000000    0.0000000      0.000

# If new new_assay_name, normalization stored in a new object
se2 = tpm_normalization(se, log_trans = FALSE, new_assay_name = 'tpm_counts')

head(assay(se2, 'tpm_counts'))
#>                  SRR1039508  SRR1039509 SRR1039512 SRR1039513 SRR1039516
#> ENSG00000260166    0.000000    0.000000      0.000      0.000      0.000
#> ENSG00000266931    0.000000    0.000000      0.000      0.000      0.000
#> ENSG00000104774 7988.133038 9155.891497   7580.641   7027.455   6525.079
#> ENSG00000267583    0.000000    1.857346      0.000      0.000      0.000
#> ENSG00000227581    0.589507    0.000000      0.000      0.000      0.000
#> ENSG00000227317    0.000000    0.000000      0.000      0.000      0.000
#>                   SRR1039517   SRR1039520 SRR1039521
#> ENSG00000260166    0.0000000    0.6380541      0.000
#> ENSG00000266931    0.0000000    0.0000000      0.000
#> ENSG00000104774 6742.8769431 7986.7194711   8515.681
#> ENSG00000267583    0.0000000    0.0000000      0.000
#> ENSG00000227581    0.8534855    0.0000000      0.000
#> ENSG00000227317    0.0000000    0.0000000      0.000

# A specific assay can also be selected
new_matrix =  matrix(data = sample(x = seq(1, 100000),
                                  size = nrow(se) * ncol(se),
                                  replace = TRUE),
                    nrow = nrow(se),
                    ncol = ncol(se))
rownames(new_matrix) = rownames(se)
colnames(new_matrix) = colnames(se)

## Creating a new assay called new counts
assay(se, 'new_counts') = new_matrix

se2 = tpm_normalization(se, new_assay_name = 'tpm_counts_new',
assay_name = 'new_counts')

se2
#> class: RangedSummarizedExperiment 
#> dim: 1000 8 
#> metadata(1): ''
#> assays(3): counts new_counts tpm_counts_new
#> rownames(1000): ENSG00000260166 ENSG00000266931 ... ENSG00000160886
#>   ENSG00000142871
#> rowData names(11): gene_id gene_name ... symbol gene_length
#> colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
#> colData names(9): SampleName cell ... Sample BioSample

head(assay(se2, 'tpm_counts_new'))
#>                 SRR1039508 SRR1039509 SRR1039512 SRR1039513  SRR1039516
#> ENSG00000260166  214.47154 312.896401   237.0760   197.1483   99.113795
#> ENSG00000266931  212.74808 259.174457   231.0856   305.1729    9.696242
#> ENSG00000104774   17.86898 903.248951   212.9786  1316.4357 1958.447321
#> ENSG00000267583  181.17143 570.739601   386.1762   695.3827  197.296851
#> ENSG00000227581  232.13179  72.579986   269.7707   134.5134  298.122123
#> ENSG00000227317  640.04367   1.868172   745.9103   897.4473  353.031404
#>                 SRR1039517 SRR1039520 SRR1039521
#> ENSG00000260166  369.23341   65.85615   28.98194
#> ENSG00000266931   34.82144  255.41825   20.19892
#> ENSG00000104774  820.90084   81.28148  101.14455
#> ENSG00000267583  673.21176  550.05897   33.15431
#> ENSG00000227581  360.70760  145.05720  231.34687
#> ENSG00000227317 1034.72572  809.57394  246.51015