Output¶
-
class
pygna.output.
Output
(network_filename: str, output_table_results_file: str, analysis: str, geneset_file: str, setnames: list, geneset_file_B: str = None, setnames_B: list = None)[source]¶ This class is used to print different data on files
-
add_GMT_entry
(key: str, descriptor: str, gene_list: str) → None[source]¶ Add a gmt entry in the GMT file
Parameters: - key – the key name to store
- descriptor – the descriptor of the gene list
- gene_list – the gene list to write
Example
>>> geneset = rc.ReadGmt("geneset_file.csv").get_geneset("brca") >>> setnames = [key for key in geneset.keys()] >>> import pygna.reading_class as rc >>> network = rc.ReadTsv("network_file.tsv").get_network() >>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames) >>> for setname, item in geneset.items(): ... item = set(item) ... module = nx.subgraph(network, item) ... lcc = sorted(list(nx.connected_components(module)), key=len, reverse=True)[0] ... out.add_GMT_entry("brca", "topology_module", lcc)
-
close_temporary_table
() → None[source]¶ Remove the temporary file
Example
>>> setnames = ["A", "B", "C"] >>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames) >>> out.create_st_table_empirical() >>> out.close_temporary_table()
-
create_GMT_output
(output_gmt: str) → None[source]¶ Write the GMT line on the GMT file
Parameters: output_gmt – the GMT to print Example
>>> geneset = rc.ReadGmt("geneset_file.csv").get_geneset("brca") >>> setnames = [key for key in geneset.keys()] >>> import pygna.reading_class as rc >>> network = rc.ReadTsv("network_file.tsv").get_network() >>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames) >>> for setname, item in geneset.items(): ... item = set(item) ... module = nx.subgraph(network, item) ... lcc = sorted(list(nx.connected_components(module)), key=len, reverse=True)[0] ... out.add_GMT_entry("brca", "topology_module", lcc) >>> out.create_GMT_output("output_lcc.gmt")
-
create_comparison_table_empirical
() → None[source]¶ Write the hadings for the comparison table
Example
>>> setnames = ["A", "B", "C"] >>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames) >>> out.create_comparison_table_empirical()
-
create_st_table_empirical
() → None[source]¶ Create the headings of the table that are going to be wrinnte in the csv file
Example
>>> setnames = ["A", "B", "C"] >>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames) >>> out.create_st_table_empirical()
-
set_diffusion_matrix
(diffusion_matrix_file: str) → None[source]¶ Set the diffusion matrix file
Parameters: diffusion_matrix_file – set the diffusion matrix file to use Example
>>> setnames = ["A", "B", "C"] >>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames) >>> out.set_diffusion_matrix("diffusion_matrix.csv")
-
update_comparison_table_empirical
(setname_A: str, setname_B: str, n_geneset_A: int, n_mapped_A: int, n_geneset_B: int, n_mapped_B: int, n_overlaps: int, number_of_permutations: int, observed: int, empirical_pvalue: float, mean_null: numpy.mean, var_null: numpy.var) → None[source]¶ Update the content of the comparison table, adding a new row on the file
Parameters: - setname_A – the name of the geneset A
- setname_B – the name of the geneset B
- n_geneset_A – the number of genes in the geneset A
- n_mapped_A – the number of mapped genes in geneset A
- n_geneset_B – the number of genes in the geneset B
- n_mapped_B – the number of mapped genes in geneset B
- n_overlaps – the number of overlaps
- number_of_permutations – number of performed permutations
- observed – number of observed genes
- empirical_pvalue – value of the empirical pvalue
- mean_null – mean of the null distribution
- var_null – variance of the null distribution
Example
>>> import itertools >>> import pygna.command as cm >>> import pygna.reading_class as rc >>> import pygna.statistical_comparison as sc >>> geneset_a = rc.ReadGmt("genset_file").get_geneset("brca") >>> setnames = [key for key in geneset_a.keys()] >>> network = rc.ReadTsv("network_file.tsv").get_network() >>> distance_matrix_filename = "distance_matrix.tsv" >>> in_memory = True >>> network = nx.Graph(network.subgraph(max(nx.connected_components(network), key=len))) >>> sp_diz = {"nodes": cm.read_distance_matrix(distance_matrix_filename, in_memory=in_memory)[0], ... "matrix": cm.read_distance_matrix(distance_matrix_filename, in_memory=in_memory)[1]} >>> st_comparison = sc.StatisticalComparison(sc.comparison_shortest_path, network, diz=sp_diz, n_proc=2) >>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames) >>> for pair in itertools.combinations(setnames, 2): ... observed, pvalue, null_d, a_mapped, b_mapped = st_comparison.comparison_empirical_pvalue(set(geneset_a[pair[0]]), set(geneset_a[pair[1]]), max_iter=number_of_permutations) ... out.update_comparison_table_empirical(pair[0], pair[1], len(set(geneset_a[pair[0]])), a_mapped, len(set(geneset_a[pair[1]])), b_mapped, n_overlaps, number_of_permutations, observed, pvalue, np.mean(null_d), np.var(null_d))
-
update_st_table_empirical
(setname: str, n_mapped: int, n_geneset: int, number_of_permutations: int, observed: int, empirical_pvalue: float, mean_null: numpy.mean, var_null: numpy.var) → None[source]¶ Update the table content, adding a new line to the file
Parameters: - setname – the name of the geneset
- n_mapped – the number of mapped genes
- n_geneset – the number of genesets
- number_of_permutations – the number of permutations
- observed – value of observed genes
- empirical_pvalue – value of the empirical p-value
- mean_null – mean of the null distribution
- var_null – var of the null distribution
Example
>>> setnames = ["A", "B", "C"] >>> out = Output("networkfile.tsv", "results.csv", "myanalysis", "genset_a.csv", setnames) >>> out.create_st_table_empirical() >>> out.update_st_table_empirical(setname, n_mapped, n_geneset, number_of_permutations, observed, pvalue=0.001, mean_null=np.mean(0.11), var_null=np.var(0.2))
-
-
pygna.output.
print_GMT
(gmt_dictionary: dict, output_file: str) → None[source]¶ Save the dictionary on a GMT file
Parameters: - gmt_dictionary – the dictionary containing the data
- output_file – the file to save the data
Example
>>> gmt_dict = {"key": "dict_sets"} >>> print_GMT(gmt_dict, "mygmt.gmt")
-
pygna.output.
apply_multiple_testing_correction
(table_file: str, pval_col: str = 'empirical_pvalue', method: str = 'fdr_bh', threshold: float = 0.1) → None[source]¶ Apply the multiple testing correction and save the file in a csv file
Parameters: - table_file – the name of the file to read
- pval_col – the name column containing the empirical pvalue
- method – the correction method to use
- threshold – the threshold to use in the method
Example
>>> table_filename = "pygna_comparison_results.csv" >>> apply_multiple_testing_correction(table_filename, pval_col="empirical_pvalue", method="fdr_bh", threshold=0.1)
-
pygna.output.
write_graph_summary
(graph: networkx.classes.graph.Graph, output_file: str, net_name: str = None) → None[source]¶ This function takes a graph as input and writes the network properties in a text file
Parameters: - graph – the graph to print
- output_file – the name of the file to print
- net_name – the name of the network
Example
>>> import pygna.reading_class as rc >>> text_output = "My summary stats" >>> network = rc.ReadTsv("mynetwork.tsv").get_network() >>> write_graph_summary(network, text_output, "mynetwork.tsv")