lenti216 [~/perltest]>more file5.pl #!/opt/local/bin/perl -w # file.pl $filename = shift; unless (defined($filename) ) { print "What sequence to analyze? "; $filename = ; chomp ($filename); } # open file for reading from and associate it with a "filehandle" # if open fails (returns "false"), inform user and end unless (open (PROTEINFILE, $filename)) { print "Cannot open file: $filename \n\n"; exit; } $outputfile = "good.pep"; open (OUTFILE, ">>$outputfile"); #open file for appending to #initialize (set to an initial value) scalar string variable $seq $seq = ""; #while there are unread lines in PROTEINFILE #read one line and, if the first character is a ">", print it # regular expressions (regexps) are enclosed in / /s. # ^ in a regexp means first character # ^> means first character is a ">" # s/ / / means replace (substitute) one regular expression for another # \s means white space (tab, blank space, new line, etc.) # s/\s// means replace a white space character by nothing (delete it) # s/\s//g means do the substitution as many times as possible (globally) # without the g, the substitution is only done once each string # =~ is the binding operator, used to assign regexp to variables while () { if (/^>/) { print $seq,"\n"; # print on the screen print OUTFILE $seq,"\n"; # also print to a file $seq = ""; print $_; print OUTFILE $_; } else { chomp $_; $_ =~ s/\s//g; $seq = $seq . $_; } # . is the string concatenation operator } # process last sequence print $seq,"\n"; print OUTFILE $seq,"\n"; # close output file # close (OUTFILE); exit; lenti217 [~/perltest]>perl file5.pl tests.pep >A28086 Length: 104 VKQIESKSAFQEVLDSAGDKLVVVDFSATWCGPCKMIKPFFHALSEKFNNVVFIEVDVDDCKDIAAECEVKCMPTFQFFKKGQKVGEFSGANKEKLEATINELL >gi|79882|pir||B29504 hypothetical 18K protein (mer operon) - Staphylococcus aureus plasmid pI258 MKKRISFTAIMTVLLIGLTACGAESDTANESKVQDIQGNPVSLPNEKPTLIYFMATWCPSCIYNEEIFKEMHQLNPNDVQLITVSLDPNTDTKESLAKFKQDYGGDWPHVLKNGKEIADTYGVKQLEEIVLVNSENEVFYRSVRPSFDDLKEALTQIGVEL >ORF15 S. aureus pathogenicity island MEDVTNEEVFEMIDSRTGVLNANDWKSQLRRSATTQALKKTTTNAEIILCNDESLKGLVQYDAFEKVTKLKRLPYWRSKGDTNYYWADIDTTHVISHIDKLYNVQFSRDLIDTVIEKEAYQNRFHPIKSMIESKSWDGIKRIETLFIDYLGAEDNHYNREVTKKWMMGAVARIYQPGIKYDSMIILYGGQGVGKSTAVSKLGGHWYNQSIKTFKGDEVYKKLQGSWICEIEELSAFQKSTIEDIKGFISAIVDIYRASYGKRTERHPRQCVFVGTTNNYEFLKDQTGNRRFFPITTDKNKATKSPFDDLTPDVVQQMFAEAKVYFDEDPTDKALLLDKEASEMALKVQEAHSEKDALVGEIEEFLERPIPSDYWYRTLEEKRVSAHDVIDQDYIKLYGDGKLIELPNTKPGAYVWRDKVCSMEIWKVMMKRDDQPQQHHLRKIDKALRNTNYCDTVKKQTRYGEGIGKQYGFSVDLASYYKNLKV lenti218 [~/perltest]>more good.pep >A28086 Length: 104 VKQIESKSAFQEVLDSAGDKLVVVDFSATWCGPCKMIKPFFHALSEKFNNVVFIEVDVDDCKDIAAECEVKCMPTFQFFK KGQKVGEFSGANKEKLEATINELL >gi|79882|pir||B29504 hypothetical 18K protein (mer operon) - Staphylococcus aur eus plasmid pI258 MKKRISFTAIMTVLLIGLTACGAESDTANESKVQDIQGNPVSLPNEKPTLIYFMATWCPSCIYNEEIFKEMHQLNPNDVQ LITVSLDPNTDTKESLAKFKQDYGGDWPHVLKNGKEIADTYGVKQLEEIVLVNSENEVFYRSVRPSFDDLKEALTQIGVE L >ORF15 S. aureus pathogenicity island MEDVTNEEVFEMIDSRTGVLNANDWKSQLRRSATTQALKKTTTNAEIILCNDESLKGLVQYDAFEKVTKLKRLPYWRSKG DTNYYWADIDTTHVISHIDKLYNVQFSRDLIDTVIEKEAYQNRFHPIKSMIESKSWDGIKRIETLFIDYLGAEDNHYNRE VTKKWMMGAVARIYQPGIKYDSMIILYGGQGVGKSTAVSKLGGHWYNQSIKTFKGDEVYKKLQGSWICEIEELSAFQKST IEDIKGFISAIVDIYRASYGKRTERHPRQCVFVGTTNNYEFLKDQTGNRRFFPITTDKNKATKSPFDDLTPDVVQQMFAE AKVYFDEDPTDKALLLDKEASEMALKVQEAHSEKDALVGEIEEFLERPIPSDYWYRTLEEKRVSAHDVIDQDYIKLYGDG KLIELPNTKPGAYVWRDKVCSMEIWKVMMKRDDQPQQHHLRKIDKALRNTNYCDTVKKQTRYGEGIGKQYGFSVDLASYY KNLKV lenti219 [~/perltest]>