Output

class pygna.output.Output(network_filename: str, output_table_results_file: str, analysis: str, geneset_file: str, setnames: list, geneset_file_B: str = None, setnames_B: list = None)[source]

This class is used to print different data on files

add_GMT_entry(key: str, descriptor: str, gene_list: str) → None[source]

Add a gmt entry in the GMT file

Parameters:
  • key – the key name to store
  • descriptor – the descriptor of the gene list
  • gene_list – the gene list to write

Example

>>> geneset = rc.ReadGmt("geneset_file.csv").get_geneset("brca")
>>> setnames = [key for key in geneset.keys()]
>>> import pygna.reading_class as rc
>>> network = rc.ReadTsv("network_file.tsv").get_network()
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> for setname, item in geneset.items():
...     item = set(item)
...     module = nx.subgraph(network, item)
...     lcc = sorted(list(nx.connected_components(module)), key=len, reverse=True)[0]
...     out.add_GMT_entry("brca", "topology_module", lcc)
close_temporary_table() → None[source]

Remove the temporary file

Example

>>> setnames = ["A", "B", "C"]
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> out.create_st_table_empirical()
>>> out.close_temporary_table()
create_GMT_output(output_gmt: str) → None[source]

Write the GMT line on the GMT file

Parameters:output_gmt – the GMT to print

Example

>>> geneset = rc.ReadGmt("geneset_file.csv").get_geneset("brca")
>>> setnames = [key for key in geneset.keys()]
>>> import pygna.reading_class as rc
>>> network = rc.ReadTsv("network_file.tsv").get_network()
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> for setname, item in geneset.items():
...     item = set(item)
...     module = nx.subgraph(network, item)
...     lcc = sorted(list(nx.connected_components(module)), key=len, reverse=True)[0]
...     out.add_GMT_entry("brca", "topology_module", lcc)
>>> out.create_GMT_output("output_lcc.gmt")
create_comparison_table_empirical() → None[source]

Write the hadings for the comparison table

Example

>>> setnames = ["A", "B", "C"]
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> out.create_comparison_table_empirical()
create_st_table_empirical() → None[source]

Create the headings of the table that are going to be wrinnte in the csv file

Example

>>> setnames = ["A", "B", "C"]
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> out.create_st_table_empirical()
set_diffusion_matrix(diffusion_matrix_file: str) → None[source]

Set the diffusion matrix file

Parameters:diffusion_matrix_file – set the diffusion matrix file to use

Example

>>> setnames = ["A", "B", "C"]
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> out.set_diffusion_matrix("diffusion_matrix.csv")
update_comparison_table_empirical(setname_A: str, setname_B: str, n_geneset_A: int, n_mapped_A: int, n_geneset_B: int, n_mapped_B: int, n_overlaps: int, number_of_permutations: int, observed: int, empirical_pvalue: float, mean_null: numpy.mean, var_null: numpy.var) → None[source]

Update the content of the comparison table, adding a new row on the file

Parameters:
  • setname_A – the name of the geneset A
  • setname_B – the name of the geneset B
  • n_geneset_A – the number of genes in the geneset A
  • n_mapped_A – the number of mapped genes in geneset A
  • n_geneset_B – the number of genes in the geneset B
  • n_mapped_B – the number of mapped genes in geneset B
  • n_overlaps – the number of overlaps
  • number_of_permutations – number of performed permutations
  • observed – number of observed genes
  • empirical_pvalue – value of the empirical pvalue
  • mean_null – mean of the null distribution
  • var_null – variance of the null distribution

Example

>>> import itertools
>>> import pygna.command as cm
>>> import pygna.reading_class as rc
>>> import pygna.statistical_comparison as sc
>>> geneset_a = rc.ReadGmt("genset_file").get_geneset("brca")
>>> setnames = [key for key in geneset_a.keys()]
>>> network = rc.ReadTsv("network_file.tsv").get_network()
>>> distance_matrix_filename = "distance_matrix.tsv"
>>> in_memory = True
>>> network = nx.Graph(network.subgraph(max(nx.connected_components(network), key=len)))
>>> sp_diz = {"nodes": cm.read_distance_matrix(distance_matrix_filename, in_memory=in_memory)[0],
...           "matrix": cm.read_distance_matrix(distance_matrix_filename, in_memory=in_memory)[1]}
>>> st_comparison = sc.StatisticalComparison(sc.comparison_shortest_path, network, diz=sp_diz, n_proc=2)
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> for pair in itertools.combinations(setnames, 2):
...     observed, pvalue, null_d, a_mapped, b_mapped = st_comparison.comparison_empirical_pvalue(set(geneset_a[pair[0]]), set(geneset_a[pair[1]]), max_iter=number_of_permutations)
...     out.update_comparison_table_empirical(pair[0], pair[1], len(set(geneset_a[pair[0]])), a_mapped, len(set(geneset_a[pair[1]])), b_mapped, n_overlaps, number_of_permutations, observed, pvalue, np.mean(null_d), np.var(null_d))
update_st_table_empirical(setname: str, n_mapped: int, n_geneset: int, number_of_permutations: int, observed: int, empirical_pvalue: float, mean_null: numpy.mean, var_null: numpy.var) → None[source]

Update the table content, adding a new line to the file

Parameters:
  • setname – the name of the geneset
  • n_mapped – the number of mapped genes
  • n_geneset – the number of genesets
  • number_of_permutations – the number of permutations
  • observed – value of observed genes
  • empirical_pvalue – value of the empirical p-value
  • mean_null – mean of the null distribution
  • var_null – var of the null distribution

Example

>>> setnames = ["A", "B", "C"]
>>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames)
>>> out.create_st_table_empirical()
>>> out.update_st_table_empirical(setname, n_mapped, n_geneset, number_of_permutations, observed, pvalue=0.001, mean_null=np.mean(0.11), var_null=np.var(0.2))
pygna.output.print_GMT(gmt_dictionary: dict, output_file: str) → None[source]

Save the dictionary on a GMT file

Parameters:
  • gmt_dictionary – the dictionary containing the data
  • output_file – the file to save the data

Example

>>> gmt_dict = {"key": "dict_sets"}
>>> print_GMT(gmt_dict, "mygmt.gmt")
pygna.output.apply_multiple_testing_correction(table_file: str, pval_col: str = 'empirical_pvalue', method: str = 'fdr_bh', threshold: float = 0.1) → None[source]

Apply the multiple testing correction and save the file in a csv file

Parameters:
  • table_file – the name of the file to read
  • pval_col – the name column containing the empirical pvalue
  • method – the correction method to use
  • threshold – the threshold to use in the method

Example

>>> table_filename = "pygna_comparison_results.csv"
>>> apply_multiple_testing_correction(table_filename, pval_col="empirical_pvalue", method="fdr_bh", threshold=0.1)
pygna.output.write_graph_summary(graph: networkx.classes.graph.Graph, output_file: str, net_name: str = None) → None[source]

This function takes a graph as input and writes the network properties in a text file

Parameters:
  • graph – the graph to print
  • output_file – the name of the file to print
  • net_name – the name of the network

Example

>>> import pygna.reading_class as rc
>>> text_output = "My summary stats"
>>> network = rc.ReadTsv("mynetwork.tsv").get_network()
>>> write_graph_summary(network, text_output, "mynetwork.tsv")