Вроде стало похоже на правду
This commit is contained in:
parent
d091cc4335
commit
113176257f
|
@ -5,11 +5,12 @@
|
|||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: main8.py",
|
||||
"name": "Python: ищи-русские-имена-в-коде.py",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "main8.py",
|
||||
"program": "ищи-русские-имена-в-коде.py",
|
||||
"console": "integratedTerminal",
|
||||
"cwd": "${workspaceFolder}/иривк",
|
||||
"justMyCode": true
|
||||
}
|
||||
]
|
||||
|
|
|
@ -1,3 +1,70 @@
|
|||
https://github.com/budden/dlist
|
||||
https://github.com/DonCuponesInternet/rails_admin
|
||||
https://github.com/aravindgd/rails_admin
|
||||
https://github.com/athompson11/Soteria-v3
|
||||
https://github.com/InsaneHyena/tgstation
|
||||
https://github.com/Boggart/-tg-station
|
||||
https://github.com/HippieStation/HippieStation13
|
||||
https://github.com/ExcessiveUseOfCobblestone/tgstation
|
||||
https://github.com/Judopay/Judo-Ruby
|
||||
https://github.com/prgTW/monolog
|
||||
https://github.com/bevis-ui/bevis-and-bt-speech
|
||||
https://github.com/gdamjan/vezilka
|
||||
https://github.com/LopatkinEvgeniy/robot-parser
|
||||
https://github.com/kimshrier/elixir
|
||||
https://github.com/Suomaa/FTT
|
||||
https://github.com/dle-modules/DLE-Charset-Converter
|
||||
https://github.com/Felix0830/gitextensions
|
||||
https://github.com/thexide/JavaScript
|
||||
https://github.com/istrel/basisjs
|
||||
https://github.com/p2rv/Univer
|
||||
https://github.com/ruLait/wp-steam-shortcode
|
||||
https://github.com/Alexponomarev7/plotter
|
||||
https://github.com/dosvid/landing
|
||||
https://github.com/vlascoder/otrs
|
||||
https://github.com/krf/kdevplatform
|
||||
https://github.com/Nukkit/Nukkit
|
||||
https://github.com/vsuh/1S_unloads
|
||||
https://github.com/alshalan/Mobile-OpenVPN
|
||||
https://github.com/LionZXY/HackathonBMSTU
|
||||
https://github.com/splitice/Elastica
|
||||
https://github.com/lolosoft/CashBook
|
||||
https://github.com/fredformout/InstagramKit
|
||||
https://github.com/nin-jin/pms-jin
|
||||
https://github.com/mcepl/youtube-dl
|
||||
https://github.com/FreeZbe/ACE3
|
||||
https://github.com/nikolauska/ACE3
|
||||
https://github.com/ddiachkov/chrno_audit
|
||||
https://github.com/pershoot/vision-2635
|
||||
https://github.com/byakatat/selenium-training
|
||||
https://github.com/Flexberry/ember-flexberry-designer
|
||||
https://github.com/otavioarc/freeCodeCamp
|
||||
https://github.com/anketolog/AnketologClient-php
|
||||
https://github.com/AKosterin/akosterin.github.io
|
||||
https://github.com/fandrej/glonassd
|
||||
https://github.com/Scorpibear/chegura
|
||||
https://github.com/mentatDemon/TOPP_TC
|
||||
https://github.com/joncol/jcon
|
||||
https://github.com/kerneldevs/caf-kernel
|
||||
https://github.com/nasser-embedded/linux
|
||||
https://github.com/fmaker/kernel_msm
|
||||
https://github.com/galaxys-cm7miui-kernel/ICS-kernel-SGS
|
||||
https://github.com/coolya/android_kernel_samsung_msm
|
||||
https://github.com/pacificIT/linux-2.6.36
|
||||
https://github.com/DerTeufel/cm7
|
||||
https://github.com/SergOmarov/Hight-level-library-for-Lua
|
||||
https://github.com/EKOsh/TeleMonBot
|
||||
https://github.com/expdevelop/d812
|
||||
https://github.com/ms301/TelegraphAPI
|
||||
https://github.com/johnner/tran
|
||||
https://github.com/esclkm/pagemasseditor
|
||||
https://github.com/EvercodeLab/EvercodeHipchatMonologBundle
|
||||
https://github.com/JeffPyeBrook/WP-e-Commerce
|
||||
https://github.com/ticketmaster-api/ticketmaster-api.github.io
|
||||
https://github.com/dreikanter/boodka
|
||||
https://github.com/daveloyall/urbit
|
||||
https://github.com/LK4D4/criu
|
||||
https://github.com/rbabichev/Astrafit
|
||||
https://github.com/mishakos/InsuranceSystem.Library
|
||||
https://github.com/vapkarian/soccer-analyzer
|
||||
https://github.com/TrayEdge/FloatingActionButton
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import requests
|
||||
from subprocess import call
|
||||
import threading
|
||||
import pygments
|
||||
from pygments.token import Text
|
||||
from pygments import lex
|
||||
from pygments.lexers import get_lexer_for_filename
|
||||
import pygments.token
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
### Проверен на python 3.7.5
|
||||
|
@ -18,19 +19,23 @@ from concurrent.futures import ThreadPoolExecutor
|
|||
|
||||
возможныеПутиК_README_mdВнутриРепозитория = ["/blob/main/README.md", "/blob/master/README.md", "/README.md", "/", ""]
|
||||
найденныеЯзыкиКоторыеМыНеЗаказывали = []
|
||||
интересныеЯзыки = ['Ruby', 'VB.net', 'GLSL', 'Perl', 'PHP', 'Python', 'Common Lisp', 'OCaml', 'Java', 'C#', 'JavaScript', 'C', 'C++', 'Prolog', 'Go', 'Rust', 'Scheme', 'Transact-SQL', 'PL-SQL', 'tsql', 'PL/1', 'plsql', 'pli', 'Pascal', 'Delphi', 'Modula-2']
|
||||
интересныеЯзыки = ['Ruby', 'VB.net', 'GLSL', 'Perl', 'PHP', 'Python', 'Common Lisp', 'OCaml', 'Java',
|
||||
'C#', 'JavaScript', 'C', 'C++', 'Prolog', 'Go', 'Rust', 'Scheme', 'Transact-SQL', 'PL-SQL', 'tsql', 'PL/1', 'plsql', 'pli', 'Pascal', 'Delphi', 'Modula-2']
|
||||
|
||||
неинтересныеРасширенияФайлов = ['.md','.txt','.html','.xml','.XML','.json']
|
||||
|
||||
|
||||
def НайденЯзыкКоторыйМыНеЗаказывали(lexer_name, url, log):
|
||||
def НайденЯзыкКоторыйМыНеЗаказывали(lexer_name, url, log, файлДляНезаказанныхЯзыков):
|
||||
if lexer_name not in найденныеЯзыкиКоторыеМыНеЗаказывали:
|
||||
найденныеЯзыкиКоторыеМыНеЗаказывали.append(lexer_name)
|
||||
log.write(f"{url} - Лексер определил язык, который не включеён в список разрешённых. {lexer.name} \n")
|
||||
print(f"{url} - Лексер определил язык, который не включеён в список разрешённых. {lexer.name} ")
|
||||
log.write(f"{url} - Лексер определил язык, который не включён в список разрешённых. {lexer_name} \n")
|
||||
print(f"{url} - Лексер определил язык, который не включён в список разрешённых. {lexer_name} ")
|
||||
файлДляНезаказанныхЯзыков.write("%s\n" % lexer_name)
|
||||
|
||||
def download_repo(url, log):
|
||||
httpsPrefix = "https://github.com/"
|
||||
assert(url.startswith(httpsPrefix))
|
||||
repo_dir = "cloned_repos/" + url.split('/')[3:]
|
||||
repo_dir = os.path.join("cloned_repos",*url.split('/')[3:])
|
||||
gitUrl = url.replace(httpsPrefix, "git@github.com:")
|
||||
try:
|
||||
call(['git', 'clone', '--depth', '1', gitUrl, repo_dir])
|
||||
|
@ -55,12 +60,11 @@ def analyze_readme(url, log):
|
|||
else:
|
||||
return 1
|
||||
except Exception as e:
|
||||
log.write(f"{readme_url} - Не найден README. \n")
|
||||
print(f"{readme_url} - Не найден README.")
|
||||
return 0
|
||||
|
||||
|
||||
def analyze_repo(url, log):
|
||||
def analyze_repo(url, log, файлДляНезаказанныхЯзыков):
|
||||
try:
|
||||
print(f"{url} STP загрузуа и анализ README")
|
||||
res = analyze_readme(url, log)
|
||||
|
@ -76,31 +80,42 @@ def analyze_repo(url, log):
|
|||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
file_ext = os.path.splitext(file_path)[1]
|
||||
неинтересноеРасширение = False
|
||||
for расш in неинтересныеРасширенияФайлов:
|
||||
if file_ext.endswith(расш):
|
||||
неинтересноеРасширение = True
|
||||
break
|
||||
|
||||
if неинтересноеРасширение:
|
||||
continue
|
||||
|
||||
lexer = None
|
||||
try:
|
||||
lexer = pygments.lexers.get_lexer_for_filename(file_ext)
|
||||
except:
|
||||
log.write(f"{url}...{file_ext} - Лексер не определил язык. \n")
|
||||
print(f"{url}...{file_ext} - Лексер не определил язык. ")
|
||||
pass
|
||||
if lexer and not(lexer.name in интересныеЯзыки):
|
||||
НайденЯзыкКоторыйМыНеЗаказывали(lexer.name, url, log)
|
||||
if file_ext and lexer and (lexer.name in интересныеЯзыки) and not file_ext.endswith(".md"):
|
||||
#with open(file_path, 'r', encoding='utf-8') as f:
|
||||
if file_ext:
|
||||
try:
|
||||
lexer = get_lexer_for_filename(file_ext)
|
||||
except:
|
||||
print(f"{url}...{file_ext} - Лексер не определил язык. ")
|
||||
lexer = None
|
||||
if lexer is None:
|
||||
continue
|
||||
if not(lexer.name in интересныеЯзыки):
|
||||
НайденЯзыкКоторыйМыНеЗаказывали(lexer.name, url, log, файлДляНезаказанныхЯзыков)
|
||||
if (lexer.name in интересныеЯзыки):
|
||||
def ИщиРусскиеИменаВТакойКодировке(encoding):
|
||||
try:
|
||||
with open(file_path, 'r', encoding=encoding, errors = 'ignore') as f:
|
||||
content = f.read()
|
||||
if not re.search('[а-яА-ЯёЁ]',content):
|
||||
return False
|
||||
with open(file_path, 'r') as f:
|
||||
for token, value in lex(f.read(), lexer):
|
||||
if token is pygments.token.Name:
|
||||
with open(file_path, 'r', encoding=encoding, errors = 'ignore') as f:
|
||||
лексемы = pygments.lex(f.read(), lexer)
|
||||
for token, value in лексемы:
|
||||
# print(token)
|
||||
if pygments.token.is_token_subtype(token, pygments.token.Name):
|
||||
if re.search('[а-яА-ЯёЁ]', value):
|
||||
return True
|
||||
return False
|
||||
except:
|
||||
log.write(f"{url} - Ошибка при разборе файла. \n")
|
||||
print(f"{url} - Ошибка при разборе файла.")
|
||||
return False
|
||||
|
||||
|
@ -112,6 +127,7 @@ def analyze_repo(url, log):
|
|||
print(f"{url} - Не обнаруженно файлов содержащих русские символы.")
|
||||
else:
|
||||
log.write(f"{url} - Русский язык был найден в этом репозитории: {files_with_russian} \n")
|
||||
log.flush()
|
||||
print(f"{url} - Русский язык был найден в этом репозитории: {files_with_russian}")
|
||||
return
|
||||
|
||||
|
@ -123,15 +139,13 @@ def analyze_repo(url, log):
|
|||
|
||||
def main():
|
||||
# Чтение ссылок из файла
|
||||
with open("ЯзыкиКоторыеМыНеЗаказывали.txt", "w") as файлДляНезаказанныхЯзыков:
|
||||
with open("repos.txt", "r") as file:
|
||||
urls = file.readlines()
|
||||
urls = [url.strip() for url in urls]
|
||||
with open("log.txt", "w") as log:
|
||||
for url in urls:
|
||||
analyze_repo(url,log)
|
||||
analyze_repo(url,log,файлДляНезаказанныхЯзыков)
|
||||
|
||||
main()
|
||||
|
||||
print(f"Другие незамеченные языки: {найденныеЯзыкиКоторыеМыНеЗаказывали}")
|
||||
with open("NotFound.txt", "w") as log:
|
||||
log.write(f'Другие незамеченные языки: {найденныеЯзыкиКоторыеМыНеЗаказывали} \n')
|
||||
|
|
Loading…
Reference in New Issue